diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
491 files changed, 67604 insertions, 41087 deletions
diff --git a/drivers/gpu/drm/i915/.gitignore b/drivers/gpu/drm/i915/.gitignore new file mode 100644 index 000000000000..d9a77f3b59b2 --- /dev/null +++ b/drivers/gpu/drm/i915/.gitignore @@ -0,0 +1 @@ +*.hdrtest diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 0d21402945ab..ba9595960bbe 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI for triaging. If in doubt, say "Y". @@ -105,11 +105,11 @@ config DRM_I915_USERPTR If in doubt, say "Y". config DRM_I915_GVT - bool "Enable Intel GVT-g graphics virtualization host support" - depends on DRM_I915 - depends on 64BIT - default n - help + bool "Enable Intel GVT-g graphics virtualization host support" + depends on DRM_I915 + depends on 64BIT + default n + help Choose this option if you want to enable Intel GVT-g graphics virtualization technology host support with integrated graphics. With GVT-g, it's possible to have one integrated graphics @@ -148,3 +148,9 @@ menu "drm/i915 Profile Guided Optimisation" depends on DRM_I915 source "drivers/gpu/drm/i915/Kconfig.profile" endmenu + +menu "drm/i915 Unstable Evolution" + visible if EXPERT && STAGING && BROKEN + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.unstable" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 8d922bb4d953..1cb28c20807c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -1,47 +1,48 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_I915_WERROR - bool "Force GCC to throw an error instead of a warning when compiling" - # As this may inadvertently break the build, only allow the user - # to shoot oneself in the foot iff they aim really hard - depends on EXPERT - # We use the dependency on !COMPILE_TEST to not be enabled in - # allmodconfig or allyesconfig configurations - depends on !COMPILE_TEST - default n - help - Add -Werror to the build flags for (and only for) i915.ko. - Do not enable this unless you are writing code for the i915.ko module. - - Recommended for driver developers only. - - If in doubt, say "N". + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Add -Werror to the build flags for (and only for) i915.ko. + Do not enable this unless you are writing code for the i915.ko module. + + Recommended for driver developers only. + + If in doubt, say "N". config DRM_I915_DEBUG - bool "Enable additional driver debugging" - depends on DRM_I915 - select DEBUG_FS - select PREEMPT_COUNT - select REFCOUNT_FULL - select I2C_CHARDEV - select STACKDEPOT - select DRM_DP_AUX_CHARDEV - select X86_MSR # used by igt/pm_rpm - select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) - select DRM_DEBUG_MM if DRM=y + bool "Enable additional driver debugging" + depends on DRM_I915 + select DEBUG_FS + select PREEMPT_COUNT + select I2C_CHARDEV + select STACKDEPOT + select DRM_DP_AUX_CHARDEV + select X86_MSR # used by igt/pm_rpm + select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) + select DRM_DEBUG_MM if DRM=y + select DRM_EXPORT_FOR_TESTS if m select DRM_DEBUG_SELFTEST + select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_MMIO bool "Always insert extra checks around mmio access by default" @@ -57,16 +58,16 @@ config DRM_I915_DEBUG_MMIO If in doubt, say "N". config DRM_I915_DEBUG_GEM - bool "Insert extra checks into the GEM internals" - default n - depends on DRM_I915_WERROR - help - Enable extra sanity checks (including BUGs) along the GEM driver - paths that may slow the system down and if hit hang the machine. + bool "Insert extra checks into the GEM internals" + default n + depends on DRM_I915_WERROR + help + Enable extra sanity checks (including BUGs) along the GEM driver + paths that may slow the system down and if hit hang the machine. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" @@ -94,47 +95,62 @@ config DRM_I915_TRACE_GEM If in doubt, say "N". +config DRM_I915_TRACE_GTT + bool "Insert extra ftrace output from the GTT internals" + depends on DRM_I915_DEBUG_GEM + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SW_FENCE_DEBUG_OBJECTS - bool "Enable additional driver debugging for fence objects" - depends on DRM_I915 - select DEBUG_OBJECTS - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for fence objects" + depends on DRM_I915 + select DEBUG_OBJECTS + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SW_FENCE_CHECK_DAG - bool "Enable additional driver debugging for detecting dependency cycles" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for detecting dependency cycles" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_GUC - bool "Enable additional driver debugging for GuC" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will help resolve GuC related issues. + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 default n + select DRM_EXPORT_FOR_TESTS if m select FAULT_INJECTION select PRIME_NUMBERS help @@ -162,15 +178,15 @@ config DRM_I915_SELFTEST_BROKEN If in doubt, say "N". config DRM_I915_LOW_LEVEL_TRACEPOINTS - bool "Enable low level request tracing events" - depends on DRM_I915 - default n - help - Choose this option to turn on low level request tracing events. - This provides the ability to precisely monitor engine utilisation - and also analyze the request dependency resolving timeline. - - If in doubt, say "N". + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE bool "Enable extra debug warnings for vblank evasion" diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 48df8889a88a..c280b6ae38eb 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -12,6 +12,29 @@ config DRM_I915_USERFAULT_AUTOSUSPEND May be 0 to disable the extra delay and solely use the device level runtime pm autosuspend delay tunable. +config DRM_I915_HEARTBEAT_INTERVAL + int "Interval between heartbeat pulses (ms)" + default 2500 # milliseconds + help + The driver sends a periodic heartbeat down all active engines to + check the health of the GPU and undertake regular house-keeping of + internal driver state. + + May be 0 to disable heartbeats and therefore disable automatic GPU + hang detection. + +config DRM_I915_PREEMPT_TIMEOUT + int "Preempt timeout (ms, jiffy granularity)" + default 640 # milliseconds + help + How long to wait (in milliseconds) for a preemption event to occur + when submitting a new context via execlists. If the current context + does not hit an arbitration point and yield to HW before the timer + expires, the HW will be reset to allow the more important context + to execute. + + May be 0 to disable the timeout. + config DRM_I915_SPIN_REQUEST int "Busywait for request completion (us)" default 5 # microseconds @@ -25,3 +48,29 @@ config DRM_I915_SPIN_REQUEST May be 0 to disable the initial spin. In practice, we estimate the cost of enabling the interrupt (if currently disabled) to be a few microseconds. + +config DRM_I915_STOP_TIMEOUT + int "How long to wait for an engine to quiesce gracefully before reset (ms)" + default 100 # milliseconds + help + By stopping submission and sleeping for a short time before resetting + the GPU, we allow the innocent contexts also on the system to quiesce. + It is then less likely for a hanging context to cause collateral + damage as the system is reset in order to recover. The corollary is + that the reset itself may take longer and so be more disruptive to + interactive or low latency workloads. + +config DRM_I915_TIMESLICE_DURATION + int "Scheduling quantum for userspace batches (ms, jiffy granularity)" + default 1 # milliseconds + help + When two user batches of equal priority are executing, we will + alternate execution of each batch to ensure forward progress of + all users. This is necessary in some cases where there may be + an implicit dependency between those batches that requires + concurrent execution in order for them to proceed, e.g. they + interact with each other via userspace semaphores. Each context + is scheduled for execution for the timeslice duration, before + switching to the next context. + + May be 0 to disable timeslicing. diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable new file mode 100644 index 000000000000..0c2276155c2b --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.unstable @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_I915_UNSTABLE + bool "Enable unstable API for early prototype development" + depends on EXPERT + depends on STAGING + depends on BROKEN # should never be enabled by distros! + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Enable prototype uAPI under general discussion before they are + finalized. Such prototypes may be withdrawn or substantially + changed before release. They are only enabled here so that a wide + number of interested parties (userspace driver developers) can + verify that the uAPI meet their expectations. These uAPI should + never be used in production. + + Recommended for driver developers _only_. + + If in the slightest bit of doubt, say "N". + +config DRM_I915_UNSTABLE_FAKE_LMEM + bool "Enable the experimental fake lmem" + depends on DRM_I915_UNSTABLE + default n + help + Convert some system memory into a fake local memory region for + testing. diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8cace65f50ce..b8c5f8934dbd 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -26,28 +26,30 @@ subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init) -CFLAGS_intel_fbdev.o = $(call cc-disable-warning, override-init) +CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init) subdir-ccflags-y += \ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) -# Extra header tests -include $(src)/Makefile.header-test - -subdir-ccflags-y += -I$(src) +subdir-ccflags-y += -I$(srctree)/$(src) # Please keep these build lists sorted! # core driver code i915-y += i915_drv.o \ i915_irq.o \ + i915_getparam.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ i915_suspend.o \ + i915_switcheroo.o \ i915_sysfs.o \ + i915_utils.o \ intel_csr.o \ intel_device_info.o \ + intel_memory_region.o \ + intel_pch.o \ intel_pm.o \ intel_runtime_pm.o \ intel_sideband.o \ @@ -59,6 +61,7 @@ i915-y += \ i915_memcpy.o \ i915_mm.o \ i915_sw_fence.o \ + i915_sw_fence_work.o \ i915_syncmap.o \ i915_user_extensions.o @@ -67,26 +70,48 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o display/intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) -obj-y += gt/ gt-y += \ + gt/debugfs_engines.o \ + gt/debugfs_gt.o \ + gt/debugfs_gt_pm.o \ + gt/gen6_ppgtt.o \ + gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ + gt/intel_engine_heartbeat.o \ gt/intel_engine_pm.o \ + gt/intel_engine_pool.o \ + gt/intel_engine_user.o \ + gt/intel_ggtt.o \ + gt/intel_gt.o \ + gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ - gt/intel_hangcheck.o \ + gt/intel_gt_pm_irq.o \ + gt/intel_gt_requests.o \ + gt/intel_gtt.o \ + gt/intel_llc.o \ gt/intel_lrc.o \ - gt/intel_reset.o \ - gt/intel_ringbuffer.o \ gt/intel_mocs.o \ + gt/intel_ppgtt.o \ + gt/intel_rc6.o \ + gt/intel_renderstate.o \ + gt/intel_reset.o \ + gt/intel_ring.o \ + gt/intel_ring_submission.o \ + gt/intel_rps.o \ gt/intel_sseu.o \ + gt/intel_timeline.o \ gt/intel_workarounds.o -gt-$(CONFIG_DRM_I915_SELFTEST) += \ - gt/mock_engine.o +# autogenerated null render state +gt-y += \ + gt/gen6_renderstate.o \ + gt/gen7_renderstate.o \ + gt/gen8_renderstate.o \ + gt/gen9_renderstate.o i915-y += $(gt-y) # GEM (Graphics Execution Management) code -obj-y += gem/ gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ @@ -99,10 +124,12 @@ gem-y += \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_object_blt.o \ + gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ + gem/i915_gem_region.o \ gem/i915_gem_shmem.o \ gem/i915_gem_shrinker.o \ gem/i915_gem_stolen.o \ @@ -114,42 +141,34 @@ gem-y += \ i915-y += \ $(gem-y) \ i915_active.o \ + i915_buddy.o \ i915_cmd_parser.o \ - i915_gem_batch_pool.o \ i915_gem_evict.o \ i915_gem_fence_reg.o \ i915_gem_gtt.o \ i915_gem.o \ - i915_gem_render_state.o \ i915_globals.o \ i915_query.o \ i915_request.o \ i915_scheduler.o \ - i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ + intel_region_lmem.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support -i915-y += intel_uc.o \ - intel_uc_fw.o \ - intel_guc.o \ - intel_guc_ads.o \ - intel_guc_ct.o \ - intel_guc_fw.o \ - intel_guc_log.o \ - intel_guc_submission.o \ - intel_huc.o \ - intel_huc_fw.o - -# autogenerated null render state -i915-y += intel_renderstate_gen6.o \ - intel_renderstate_gen7.o \ - intel_renderstate_gen8.o \ - intel_renderstate_gen9.o +i915-y += gt/uc/intel_uc.o \ + gt/uc/intel_uc_fw.o \ + gt/uc/intel_guc.o \ + gt/uc/intel_guc_ads.o \ + gt/uc/intel_guc_ct.o \ + gt/uc/intel_guc_fw.o \ + gt/uc/intel_guc_log.o \ + gt/uc/intel_guc_submission.o \ + gt/uc/intel_huc.o \ + gt/uc/intel_huc_fw.o # modesetting core code -obj-y += display/ i915-y += \ display/intel_atomic.o \ display/intel_atomic_plane.o \ @@ -164,6 +183,7 @@ i915-y += \ display/intel_display_power.o \ display/intel_dpio_phy.o \ display/intel_dpll_mgr.o \ + display/intel_dsb.o \ display/intel_fbc.o \ display/intel_fifo_underrun.o \ display/intel_frontbuffer.o \ @@ -173,7 +193,9 @@ i915-y += \ display/intel_overlay.o \ display/intel_psr.o \ display/intel_quirks.o \ - display/intel_sprite.o + display/intel_sprite.o \ + display/intel_tc.o \ + display/intel_vga.o i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ display/intel_opregion.o @@ -210,37 +232,41 @@ i915-y += \ display/vlv_dsi.o \ display/vlv_dsi_pll.o +# perf code +i915-y += \ + oa/i915_oa_hsw.o \ + oa/i915_oa_bdw.o \ + oa/i915_oa_chv.o \ + oa/i915_oa_sklgt2.o \ + oa/i915_oa_sklgt3.o \ + oa/i915_oa_sklgt4.o \ + oa/i915_oa_bxt.o \ + oa/i915_oa_kblgt2.o \ + oa/i915_oa_kblgt3.o \ + oa/i915_oa_glk.o \ + oa/i915_oa_cflgt2.o \ + oa/i915_oa_cflgt3.o \ + oa/i915_oa_cnl.o \ + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o +i915-y += i915_perf.o + # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ gem/selftests/igt_gem_utils.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ + selftests/igt_atomic.o \ selftests/igt_flush_test.o \ selftests/igt_live_test.o \ + selftests/igt_mmap.o \ selftests/igt_reset.o \ selftests/igt_spinner.o # virtual gpu code i915-y += i915_vgpu.o -# perf code -i915-y += i915_perf.o \ - i915_oa_hsw.o \ - i915_oa_bdw.o \ - i915_oa_chv.o \ - i915_oa_sklgt2.o \ - i915_oa_sklgt3.o \ - i915_oa_sklgt4.o \ - i915_oa_bxt.o \ - i915_oa_kblgt2.o \ - i915_oa_kblgt3.o \ - i915_oa_glk.o \ - i915_oa_cflgt2.o \ - i915_oa_cflgt3.o \ - i915_oa_cnl.o \ - i915_oa_icl.o - ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o include $(src)/gvt/Makefile @@ -248,3 +274,27 @@ endif obj-$(CONFIG_DRM_I915) += i915.o obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o + +# header test + +# exclude some broken headers from the test coverage +no-header-test := \ + display/intel_vbt_defs.h \ + gvt/execlist.h \ + gvt/fb_decoder.h \ + gvt/gtt.h \ + gvt/gvt.h \ + gvt/interrupt.h \ + gvt/mmio_context.h \ + gvt/mpt.h \ + gvt/scheduler.h + +extra-$(CONFIG_DRM_I915_WERROR) += \ + $(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \ + $(shell cd $(srctree)/$(src) && find * -name '*.h'))) + +quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) + cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@ + +$(obj)/%.hdrtest: $(src)/%.h FORCE + $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test deleted file mode 100644 index 7cde0ec34615..000000000000 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header-test-$(CONFIG_DRM_I915_WERROR) := \ - i915_active_types.h \ - i915_debugfs.h \ - i915_drv.h \ - i915_irq.h \ - i915_params.h \ - i915_priolist_types.h \ - i915_reg.h \ - i915_scheduler_types.h \ - i915_timeline_types.h \ - i915_utils.h \ - intel_csr.h \ - intel_drv.h \ - intel_pm.h \ - intel_runtime_pm.h \ - intel_sideband.h \ - intel_uncore.h \ - intel_wakeref.h diff --git a/drivers/gpu/drm/i915/display/Makefile b/drivers/gpu/drm/i915/display/Makefile deleted file mode 100644 index 1c75b5c9790c..000000000000 --- a/drivers/gpu/drm/i915/display/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# Extra header tests -include $(src)/Makefile.header-test diff --git a/drivers/gpu/drm/i915/display/Makefile.header-test b/drivers/gpu/drm/i915/display/Makefile.header-test deleted file mode 100644 index fc7d4e5bd2c6..000000000000 --- a/drivers/gpu/drm/i915/display/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(filter-out %/intel_vbt_defs.h,$(wildcard $(src)/*.h))) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/display/dvo_ch7017.c b/drivers/gpu/drm/i915/display/dvo_ch7017.c index 602380fe74f3..0589994dde11 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7017.c @@ -25,7 +25,7 @@ * */ -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" #define CH7017_TV_DISPLAY_MODE 0x00 diff --git a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c index e070bebee7b5..54f58ba44b9f 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c @@ -26,7 +26,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" #define CH7xxx_REG_VID 0x4a diff --git a/drivers/gpu/drm/i915/display/dvo_ivch.c b/drivers/gpu/drm/i915/display/dvo_ivch.c index 09dba35f3ffa..f43d8c610d3f 100644 --- a/drivers/gpu/drm/i915/display/dvo_ivch.c +++ b/drivers/gpu/drm/i915/display/dvo_ivch.c @@ -29,7 +29,7 @@ * */ -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" /* diff --git a/drivers/gpu/drm/i915/display/dvo_ns2501.c b/drivers/gpu/drm/i915/display/dvo_ns2501.c index c83a5d88d62b..a724a8755673 100644 --- a/drivers/gpu/drm/i915/display/dvo_ns2501.c +++ b/drivers/gpu/drm/i915/display/dvo_ns2501.c @@ -28,7 +28,7 @@ #include "i915_drv.h" #include "i915_reg.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" #define NS2501_VID 0x1305 diff --git a/drivers/gpu/drm/i915/display/dvo_sil164.c b/drivers/gpu/drm/i915/display/dvo_sil164.c index 04698eaeb632..0dfa0a0209ff 100644 --- a/drivers/gpu/drm/i915/display/dvo_sil164.c +++ b/drivers/gpu/drm/i915/display/dvo_sil164.c @@ -26,7 +26,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" #define SIL164_VID 0x0001 diff --git a/drivers/gpu/drm/i915/display/dvo_tfp410.c b/drivers/gpu/drm/i915/display/dvo_tfp410.c index 623114ee73cd..009d65b0f3e9 100644 --- a/drivers/gpu/drm/i915/display/dvo_tfp410.c +++ b/drivers/gpu/drm/i915/display/dvo_tfp410.c @@ -25,7 +25,7 @@ * */ -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo_dev.h" /* register definitions according to the TFP410 data sheet */ diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 74448e6bf749..f8e882101396 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -34,6 +34,7 @@ #include "intel_ddi.h" #include "intel_dsi.h" #include "intel_panel.h" +#include "intel_vdsc.h" static inline int header_credits_available(struct drm_i915_private *dev_priv, enum transcoder dsi_trans) @@ -76,7 +77,7 @@ static enum transcoder dsi_port_to_transcoder(enum port port) static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; enum transcoder dsi_trans; @@ -201,64 +202,63 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host, static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + enum phy phy; u32 tmp; int lane; - for_each_dsi_port(port, intel_dsi->ports) { - + for_each_dsi_phy(phy, intel_dsi->phys) { /* * Program voltage swing and pre-emphasis level values as per * table in BSPEC under DDI buffer programing */ - tmp = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + tmp = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); tmp &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK); tmp |= SCALING_MODE_SEL(0x2); tmp |= TAP2_DISABLE | TAP3_DISABLE; tmp |= RTERM_SELECT(0x6); - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), tmp); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), tmp); - tmp = I915_READ(ICL_PORT_TX_DW5_AUX(port)); + tmp = I915_READ(ICL_PORT_TX_DW5_AUX(phy)); tmp &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK); tmp |= SCALING_MODE_SEL(0x2); tmp |= TAP2_DISABLE | TAP3_DISABLE; tmp |= RTERM_SELECT(0x6); - I915_WRITE(ICL_PORT_TX_DW5_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW5_AUX(phy), tmp); - tmp = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + tmp = I915_READ(ICL_PORT_TX_DW2_LN0(phy)); tmp &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); tmp |= SWING_SEL_UPPER(0x2); tmp |= SWING_SEL_LOWER(0x2); tmp |= RCOMP_SCALAR(0x98); - I915_WRITE(ICL_PORT_TX_DW2_GRP(port), tmp); + I915_WRITE(ICL_PORT_TX_DW2_GRP(phy), tmp); - tmp = I915_READ(ICL_PORT_TX_DW2_AUX(port)); + tmp = I915_READ(ICL_PORT_TX_DW2_AUX(phy)); tmp &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); tmp |= SWING_SEL_UPPER(0x2); tmp |= SWING_SEL_LOWER(0x2); tmp |= RCOMP_SCALAR(0x98); - I915_WRITE(ICL_PORT_TX_DW2_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW2_AUX(phy), tmp); - tmp = I915_READ(ICL_PORT_TX_DW4_AUX(port)); + tmp = I915_READ(ICL_PORT_TX_DW4_AUX(phy)); tmp &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); tmp |= POST_CURSOR_1(0x0); tmp |= POST_CURSOR_2(0x0); tmp |= CURSOR_COEFF(0x3f); - I915_WRITE(ICL_PORT_TX_DW4_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW4_AUX(phy), tmp); for (lane = 0; lane <= 3; lane++) { /* Bspec: must not use GRP register for write */ - tmp = I915_READ(ICL_PORT_TX_DW4_LN(lane, port)); + tmp = I915_READ(ICL_PORT_TX_DW4_LN(lane, phy)); tmp &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); tmp |= POST_CURSOR_1(0x0); tmp |= POST_CURSOR_2(0x0); tmp |= CURSOR_COEFF(0x3f); - I915_WRITE(ICL_PORT_TX_DW4_LN(lane, port), tmp); + I915_WRITE(ICL_PORT_TX_DW4_LN(lane, phy), tmp); } } } @@ -267,7 +267,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 dss_ctl1; dss_ctl1 = I915_READ(DSS_CTL1); @@ -277,7 +277,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { const struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; u32 dss_ctl2; u16 hactive = adjusted_mode->crtc_hdisplay; u16 dl_buffer_depth; @@ -302,18 +302,31 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, I915_WRITE(DSS_CTL1, dss_ctl1); } -static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder) +/* aka DSI 8X clock */ +static int afe_clk(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + int bpp; + + if (crtc_state->dsc.compression_enable) + bpp = crtc_state->dsc.compressed_bpp; + else + bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); + + return DIV_ROUND_CLOSEST(intel_dsi->pclk * bpp, intel_dsi->lane_count); +} + +static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; - u32 bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); - u32 afe_clk_khz; /* 8X Clock */ + int afe_clk_khz; u32 esc_clk_div_m; - afe_clk_khz = DIV_ROUND_CLOSEST(intel_dsi->pclk * bpp, - intel_dsi->lane_count); - + afe_clk_khz = afe_clk(encoder, crtc_state); esc_clk_div_m = DIV_ROUND_UP(afe_clk_khz, DSI_MAX_ESC_CLK); for_each_dsi_port(port, intel_dsi->ports) { @@ -347,7 +360,7 @@ static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv, static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -363,46 +376,59 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + enum phy phy; - for_each_dsi_port(port, intel_dsi->ports) - intel_combo_phy_power_up_lanes(dev_priv, port, true, + for_each_dsi_phy(phy, intel_dsi->phys) + intel_combo_phy_power_up_lanes(dev_priv, phy, true, intel_dsi->lane_count, false); } static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + enum phy phy; u32 tmp; int lane; /* Step 4b(i) set loadgen select for transmit and aux lanes */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_TX_DW4_AUX(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_TX_DW4_AUX(phy)); tmp &= ~LOADGEN_SELECT; - I915_WRITE(ICL_PORT_TX_DW4_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW4_AUX(phy), tmp); for (lane = 0; lane <= 3; lane++) { - tmp = I915_READ(ICL_PORT_TX_DW4_LN(lane, port)); + tmp = I915_READ(ICL_PORT_TX_DW4_LN(lane, phy)); tmp &= ~LOADGEN_SELECT; if (lane != 2) tmp |= LOADGEN_SELECT; - I915_WRITE(ICL_PORT_TX_DW4_LN(lane, port), tmp); + I915_WRITE(ICL_PORT_TX_DW4_LN(lane, phy), tmp); } } /* Step 4b(ii) set latency optimization for transmit and aux lanes */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_TX_DW2_AUX(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_TX_DW2_AUX(phy)); tmp &= ~FRC_LATENCY_OPTIM_MASK; tmp |= FRC_LATENCY_OPTIM_VAL(0x5); - I915_WRITE(ICL_PORT_TX_DW2_AUX(port), tmp); - tmp = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + I915_WRITE(ICL_PORT_TX_DW2_AUX(phy), tmp); + tmp = I915_READ(ICL_PORT_TX_DW2_LN0(phy)); tmp &= ~FRC_LATENCY_OPTIM_MASK; tmp |= FRC_LATENCY_OPTIM_VAL(0x5); - I915_WRITE(ICL_PORT_TX_DW2_GRP(port), tmp); + I915_WRITE(ICL_PORT_TX_DW2_GRP(phy), tmp); + + /* For EHL, TGL, set latency optimization for PCS_DW1 lanes */ + if (IS_ELKHARTLAKE(dev_priv) || (INTEL_GEN(dev_priv) >= 12)) { + tmp = I915_READ(ICL_PORT_PCS_DW1_AUX(phy)); + tmp &= ~LATENCY_OPTIM_MASK; + tmp |= LATENCY_OPTIM_VAL(0); + I915_WRITE(ICL_PORT_PCS_DW1_AUX(phy), tmp); + + tmp = I915_READ(ICL_PORT_PCS_DW1_LN0(phy)); + tmp &= ~LATENCY_OPTIM_MASK; + tmp |= LATENCY_OPTIM_VAL(0x1); + I915_WRITE(ICL_PORT_PCS_DW1_GRP(phy), tmp); + } } } @@ -410,18 +436,18 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; - enum port port; + enum phy phy; /* clear common keeper enable bit */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_PCS_DW1_LN0(phy)); tmp &= ~COMMON_KEEPER_EN; - I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), tmp); - tmp = I915_READ(ICL_PORT_PCS_DW1_AUX(port)); + I915_WRITE(ICL_PORT_PCS_DW1_GRP(phy), tmp); + tmp = I915_READ(ICL_PORT_PCS_DW1_AUX(phy)); tmp &= ~COMMON_KEEPER_EN; - I915_WRITE(ICL_PORT_PCS_DW1_AUX(port), tmp); + I915_WRITE(ICL_PORT_PCS_DW1_AUX(phy), tmp); } /* @@ -429,40 +455,40 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) * Note: loadgen select program is done * as part of lane phy sequence configuration */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_CL_DW5(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_CL_DW5(phy)); tmp |= SUS_CLOCK_CONFIG; - I915_WRITE(ICL_PORT_CL_DW5(port), tmp); + I915_WRITE(ICL_PORT_CL_DW5(phy), tmp); } /* Clear training enable to change swing values */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); tmp &= ~TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), tmp); - tmp = I915_READ(ICL_PORT_TX_DW5_AUX(port)); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), tmp); + tmp = I915_READ(ICL_PORT_TX_DW5_AUX(phy)); tmp &= ~TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW5_AUX(phy), tmp); } /* Program swing and de-emphasis */ dsi_program_swing_and_deemphasis(encoder); /* Set training enable to trigger update */ - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); tmp |= TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), tmp); - tmp = I915_READ(ICL_PORT_TX_DW5_AUX(port)); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), tmp); + tmp = I915_READ(ICL_PORT_TX_DW5_AUX(phy)); tmp |= TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_AUX(port), tmp); + I915_WRITE(ICL_PORT_TX_DW5_AUX(phy), tmp); } } static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; @@ -478,12 +504,15 @@ static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) } } -static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder) +static void +gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; + enum phy phy; /* Program T-INIT master registers */ for_each_dsi_port(port, intel_dsi->ports) { @@ -517,18 +546,28 @@ static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder) * a value '0' inside TA_PARAM_REGISTERS otherwise * leave all fields at HW default values. */ - if (intel_dsi_bitrate(intel_dsi) <= 800000) { - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(DPHY_TA_TIMING_PARAM(port)); - tmp &= ~TA_SURE_MASK; - tmp |= TA_SURE_OVERRIDE | TA_SURE(0); - I915_WRITE(DPHY_TA_TIMING_PARAM(port), tmp); - - /* shadow register inside display core */ - tmp = I915_READ(DSI_TA_TIMING_PARAM(port)); - tmp &= ~TA_SURE_MASK; - tmp |= TA_SURE_OVERRIDE | TA_SURE(0); - I915_WRITE(DSI_TA_TIMING_PARAM(port), tmp); + if (IS_GEN(dev_priv, 11)) { + if (afe_clk(encoder, crtc_state) <= 800000) { + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(DPHY_TA_TIMING_PARAM(port)); + tmp &= ~TA_SURE_MASK; + tmp |= TA_SURE_OVERRIDE | TA_SURE(0); + I915_WRITE(DPHY_TA_TIMING_PARAM(port), tmp); + + /* shadow register inside display core */ + tmp = I915_READ(DSI_TA_TIMING_PARAM(port)); + tmp &= ~TA_SURE_MASK; + tmp |= TA_SURE_OVERRIDE | TA_SURE(0); + I915_WRITE(DSI_TA_TIMING_PARAM(port), tmp); + } + } + } + + if (IS_ELKHARTLAKE(dev_priv)) { + for_each_dsi_phy(phy, intel_dsi->phys) { + tmp = I915_READ(ICL_DPHY_CHKN(phy)); + tmp |= ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP; + I915_WRITE(ICL_DPHY_CHKN(phy), tmp); } } } @@ -536,34 +575,32 @@ static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder) static void gen11_dsi_gate_clocks(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; - enum port port; + enum phy phy; mutex_lock(&dev_priv->dpll_lock); - tmp = I915_READ(DPCLKA_CFGCR0_ICL); - for_each_dsi_port(port, intel_dsi->ports) { - tmp |= DPCLKA_CFGCR0_DDI_CLK_OFF(port); - } + tmp = I915_READ(ICL_DPCLKA_CFGCR0); + for_each_dsi_phy(phy, intel_dsi->phys) + tmp |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); - I915_WRITE(DPCLKA_CFGCR0_ICL, tmp); + I915_WRITE(ICL_DPCLKA_CFGCR0, tmp); mutex_unlock(&dev_priv->dpll_lock); } static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; - enum port port; + enum phy phy; mutex_lock(&dev_priv->dpll_lock); - tmp = I915_READ(DPCLKA_CFGCR0_ICL); - for_each_dsi_port(port, intel_dsi->ports) { - tmp &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); - } + tmp = I915_READ(ICL_DPCLKA_CFGCR0); + for_each_dsi_phy(phy, intel_dsi->phys) + tmp &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); - I915_WRITE(DPCLKA_CFGCR0_ICL, tmp); + I915_WRITE(ICL_DPCLKA_CFGCR0, tmp); mutex_unlock(&dev_priv->dpll_lock); } @@ -571,26 +608,29 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_shared_dpll *pll = crtc_state->shared_dpll; - enum port port; + enum phy phy; u32 val; mutex_lock(&dev_priv->dpll_lock); - val = I915_READ(DPCLKA_CFGCR0_ICL); - for_each_dsi_port(port, intel_dsi->ports) { - val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); + val = I915_READ(ICL_DPCLKA_CFGCR0); + for_each_dsi_phy(phy, intel_dsi->phys) { + val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + val |= ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); } - I915_WRITE(DPCLKA_CFGCR0_ICL, val); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); - for_each_dsi_port(port, intel_dsi->ports) { - val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + for_each_dsi_phy(phy, intel_dsi->phys) { + if (INTEL_GEN(dev_priv) >= 12) + val |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); + else + val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); } - I915_WRITE(DPCLKA_CFGCR0_ICL, val); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); - POSTING_READ(DPCLKA_CFGCR0_ICL); + POSTING_READ(ICL_DPCLKA_CFGCR0); mutex_unlock(&dev_priv->dpll_lock); } @@ -600,8 +640,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); enum pipe pipe = intel_crtc->pipe; u32 tmp; enum port port; @@ -617,7 +657,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, tmp |= EOTP_DISABLED; /* enable link calibration if freq > 1.5Gbps */ - if (intel_dsi_bitrate(intel_dsi) >= 1500 * 1000) { + if (afe_clk(encoder, pipe_config) >= 1500 * 1000) { tmp &= ~LINK_CALIBRATION_MASK; tmp |= CALIBRATION_ENABLED_INITIAL_ONLY; } @@ -643,22 +683,31 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* select pixel format */ tmp &= ~PIX_FMT_MASK; - switch (intel_dsi->pixel_format) { - default: - MISSING_CASE(intel_dsi->pixel_format); - /* fallthrough */ - case MIPI_DSI_FMT_RGB565: - tmp |= PIX_FMT_RGB565; - break; - case MIPI_DSI_FMT_RGB666_PACKED: - tmp |= PIX_FMT_RGB666_PACKED; - break; - case MIPI_DSI_FMT_RGB666: - tmp |= PIX_FMT_RGB666_LOOSE; - break; - case MIPI_DSI_FMT_RGB888: - tmp |= PIX_FMT_RGB888; - break; + if (pipe_config->dsc.compression_enable) { + tmp |= PIX_FMT_COMPRESSED; + } else { + switch (intel_dsi->pixel_format) { + default: + MISSING_CASE(intel_dsi->pixel_format); + /* fallthrough */ + case MIPI_DSI_FMT_RGB565: + tmp |= PIX_FMT_RGB565; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + tmp |= PIX_FMT_RGB666_PACKED; + break; + case MIPI_DSI_FMT_RGB666: + tmp |= PIX_FMT_RGB666_LOOSE; + break; + case MIPI_DSI_FMT_RGB888: + tmp |= PIX_FMT_RGB888; + break; + } + } + + if (INTEL_GEN(dev_priv) >= 12) { + if (is_vid_mode(intel_dsi)) + tmp |= BLANKING_PACKET_ENABLE; } /* program DSI operation mode */ @@ -716,6 +765,9 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, case PIPE_C: tmp |= TRANS_DDI_EDP_INPUT_C_ONOFF; break; + case PIPE_D: + tmp |= TRANS_DDI_EDP_INPUT_D_ONOFF; + break; } /* enable DDI buffer */ @@ -734,27 +786,39 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, static void gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); const struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; enum port port; enum transcoder dsi_trans; /* horizontal timings */ u16 htotal, hactive, hsync_start, hsync_end, hsync_size; - u16 hfront_porch, hback_porch; + u16 hback_porch; /* vertical timings */ u16 vtotal, vactive, vsync_start, vsync_end, vsync_shift; + int mul = 1, div = 1; + + /* + * Adjust horizontal timings (htotal, hsync_start, hsync_end) to account + * for slower link speed if DSC is enabled. + * + * The compression frequency ratio is the ratio between compressed and + * non-compressed link speeds, and simplifies down to the ratio between + * compressed and non-compressed bpp. + */ + if (crtc_state->dsc.compression_enable) { + mul = crtc_state->dsc.compressed_bpp; + div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); + } hactive = adjusted_mode->crtc_hdisplay; - htotal = adjusted_mode->crtc_htotal; - hsync_start = adjusted_mode->crtc_hsync_start; - hsync_end = adjusted_mode->crtc_hsync_end; + htotal = DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div); + hsync_start = DIV_ROUND_UP(adjusted_mode->crtc_hsync_start * mul, div); + hsync_end = DIV_ROUND_UP(adjusted_mode->crtc_hsync_end * mul, div); hsync_size = hsync_end - hsync_start; - hfront_porch = (adjusted_mode->crtc_hsync_start - - adjusted_mode->crtc_hdisplay); hback_porch = (adjusted_mode->crtc_htotal - adjusted_mode->crtc_hsync_end); vactive = adjusted_mode->crtc_vdisplay; @@ -845,12 +909,21 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, dsi_trans = dsi_port_to_transcoder(port); I915_WRITE(VSYNCSHIFT(dsi_trans), vsync_shift); } + + /* program TRANS_VBLANK register, should be same as vtotal programmed */ + if (INTEL_GEN(dev_priv) >= 12) { + for_each_dsi_port(port, intel_dsi->ports) { + dsi_trans = dsi_port_to_transcoder(port); + I915_WRITE(VBLANK(dsi_trans), + (vactive - 1) | ((vtotal - 1) << 16)); + } + } } static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -862,18 +935,17 @@ static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) I915_WRITE(PIPECONF(dsi_trans), tmp); /* wait for transcoder to be enabled */ - if (intel_wait_for_register(&dev_priv->uncore, - PIPECONF(dsi_trans), - I965_PIPECONF_ACTIVE, - I965_PIPECONF_ACTIVE, 10)) + if (intel_de_wait_for_set(dev_priv, PIPECONF(dsi_trans), + I965_PIPECONF_ACTIVE, 10)) DRM_ERROR("DSI transcoder not enabled\n"); } } -static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder) +static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp, hs_tx_timeout, lp_rx_timeout, ta_timeout, divisor, mul; @@ -885,7 +957,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder) * TIME_NS = (BYTE_CLK_COUNT * 8 * 10^6)/ Bitrate * ESCAPE_CLK_COUNT = TIME_NS/ESC_CLK_NS */ - divisor = intel_dsi_tlpx_ns(intel_dsi) * intel_dsi_bitrate(intel_dsi) * 1000; + divisor = intel_dsi_tlpx_ns(intel_dsi) * afe_clk(encoder, crtc_state) * 1000; mul = 8 * 1000000; hs_tx_timeout = DIV_ROUND_UP(intel_dsi->hs_tx_timeout * mul, divisor); @@ -921,8 +993,10 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder) static void gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + /* step 4a: power up all lanes of the DDI used by DSI */ gen11_dsi_power_up_lanes(encoder); @@ -936,22 +1010,23 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder, gen11_dsi_enable_ddi_buffer(encoder); /* setup D-PHY timings */ - gen11_dsi_setup_dphy_timings(encoder); + gen11_dsi_setup_dphy_timings(encoder, crtc_state); /* step 4h: setup DSI protocol timeouts */ - gen11_dsi_setup_timeouts(encoder); + gen11_dsi_setup_timeouts(encoder, crtc_state); /* Step (4h, 4i, 4j, 4k): Configure transcoder */ - gen11_dsi_configure_transcoder(encoder, pipe_config); + gen11_dsi_configure_transcoder(encoder, crtc_state); /* Step 4l: Gate DDI clocks */ - gen11_dsi_gate_clocks(encoder); + if (IS_GEN(dev_priv, 11)) + gen11_dsi_gate_clocks(encoder); } static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; enum transcoder dsi_trans; @@ -988,21 +1063,21 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } static void gen11_dsi_pre_pll_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { /* step2: enable IO power */ gen11_dsi_enable_io_power(encoder); /* step3: enable DSI PLL */ - gen11_dsi_program_esc_clk_div(encoder); + gen11_dsi_program_esc_clk_div(encoder, crtc_state); } static void gen11_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); /* step3b */ gen11_dsi_map_pll(encoder, pipe_config); @@ -1013,6 +1088,8 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder, /* step5: program and powerup panel */ gen11_dsi_powerup_panel(encoder); + intel_dsc_enable(encoder, pipe_config); + /* step6c: configure transcoder timings */ gen11_dsi_set_transcoder_timings(encoder, pipe_config); @@ -1027,7 +1104,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder, static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -1041,16 +1118,15 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) I915_WRITE(PIPECONF(dsi_trans), tmp); /* wait for transcoder to be disabled */ - if (intel_wait_for_register(&dev_priv->uncore, - PIPECONF(dsi_trans), - I965_PIPECONF_ACTIVE, 0, 50)) + if (intel_de_wait_for_clear(dev_priv, PIPECONF(dsi_trans), + I965_PIPECONF_ACTIVE, 50)) DRM_ERROR("DSI trancoder not disabled\n"); } } static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); @@ -1063,7 +1139,7 @@ static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder) static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -1104,7 +1180,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) static void gen11_dsi_disable_port(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; @@ -1126,7 +1202,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -1153,7 +1229,7 @@ static void gen11_dsi_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); /* step1: turn off backlight */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); @@ -1175,12 +1251,42 @@ static void gen11_dsi_disable(struct intel_encoder *encoder, gen11_dsi_disable_io_power(encoder); } +static void gen11_dsi_post_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + intel_crtc_vblank_off(old_crtc_state); + + intel_dsc_disable(old_crtc_state); + + skl_scaler_disable(old_crtc_state); +} + +static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + /* FIXME: DSC? */ + return intel_dsi_mode_valid(connector, mode); +} + static void gen11_dsi_get_timings(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; + + if (pipe_config->dsc.compressed_bpp) { + int div = pipe_config->dsc.compressed_bpp; + int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); + + adjusted_mode->crtc_htotal = + DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div); + adjusted_mode->crtc_hsync_start = + DIV_ROUND_UP(adjusted_mode->crtc_hsync_start * mul, div); + adjusted_mode->crtc_hsync_end = + DIV_ROUND_UP(adjusted_mode->crtc_hsync_end * mul, div); + } if (intel_dsi->dual_link) { adjusted_mode->crtc_hdisplay *= 2; @@ -1206,22 +1312,66 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + + intel_dsc_get_config(encoder, pipe_config); /* FIXME: adapt icl_ddi_clock_get() for DSI and use that? */ pipe_config->port_clock = cnl_calc_wrpll_link(dev_priv, &pipe_config->dpll_hw_state); - pipe_config->base.adjusted_mode.crtc_clock = intel_dsi->pclk; + pipe_config->hw.adjusted_mode.crtc_clock = intel_dsi->pclk; if (intel_dsi->dual_link) - pipe_config->base.adjusted_mode.crtc_clock *= 2; + pipe_config->hw.adjusted_mode.crtc_clock *= 2; gen11_dsi_get_timings(encoder, pipe_config); pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc); } +static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + int dsc_max_bpc = INTEL_GEN(dev_priv) >= 12 ? 12 : 10; + bool use_dsc; + int ret; + + use_dsc = intel_bios_get_dsc_params(encoder, crtc_state, dsc_max_bpc); + if (!use_dsc) + return 0; + + if (crtc_state->pipe_bpp < 8 * 3) + return -EINVAL; + + /* FIXME: split only when necessary */ + if (crtc_state->dsc.slice_count > 1) + crtc_state->dsc.dsc_split = true; + + vdsc_cfg->convert_rgb = true; + + ret = intel_dsc_compute_params(encoder, crtc_state); + if (ret) + return ret; + + /* DSI specific sanity checks on the common code */ + WARN_ON(vdsc_cfg->vbr_enable); + WARN_ON(vdsc_cfg->simple_422); + WARN_ON(vdsc_cfg->pic_width % vdsc_cfg->slice_width); + WARN_ON(vdsc_cfg->slice_height < 8); + WARN_ON(vdsc_cfg->pic_height % vdsc_cfg->slice_height); + + ret = drm_dsc_compute_rc_parameters(vdsc_cfg); + if (ret) + return ret; + + crtc_state->dsc.compression_enable = true; + + return 0; +} + static int gen11_dsi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -1229,11 +1379,11 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, base); struct intel_connector *intel_connector = intel_dsi->attached_connector; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; intel_fixed_panel_mode(fixed_mode, adjusted_mode); @@ -1247,8 +1397,17 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, else pipe_config->cpu_transcoder = TRANSCODER_DSI_0; + if (intel_dsi->pixel_format == MIPI_DSI_FMT_RGB888) + pipe_config->pipe_bpp = 24; + else + pipe_config->pipe_bpp = 18; + pipe_config->clock_set = true; - pipe_config->port_clock = intel_dsi_bitrate(intel_dsi) / 5; + + if (gen11_dsi_dsc_compute_config(encoder, pipe_config)) + DRM_DEBUG_KMS("Attempting to use DSC failed\n"); + + pipe_config->port_clock = afe_clk(encoder, pipe_config) / 5; return 0; } @@ -1256,15 +1415,21 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - get_dsi_io_power_domains(to_i915(encoder->base.dev), - enc_to_intel_dsi(&encoder->base)); + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + get_dsi_io_power_domains(i915, + enc_to_intel_dsi(encoder)); + + if (crtc_state->dsc.compression_enable) + intel_display_power_get(i915, + intel_dsc_power_domain(crtc_state)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum transcoder dsi_trans; intel_wakeref_t wakeref; enum port port; @@ -1289,6 +1454,9 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, case TRANS_DDI_EDP_INPUT_C_ONOFF: *pipe = PIPE_C; break; + case TRANS_DDI_EDP_INPUT_D_ONOFF: + *pipe = PIPE_D; + break; default: DRM_ERROR("Invalid PIPE input\n"); goto out; @@ -1324,7 +1492,7 @@ static const struct drm_connector_funcs gen11_dsi_connector_funcs = { static const struct drm_connector_helper_funcs gen11_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, - .mode_valid = intel_dsi_mode_valid, + .mode_valid = gen11_dsi_mode_valid, .atomic_check = intel_digital_connector_atomic_check, }; @@ -1487,6 +1655,26 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi) intel_dsi_log_params(intel_dsi); } +static void icl_dsi_add_properties(struct intel_connector *connector) +{ + u32 allowed_scalers; + + allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | + BIT(DRM_MODE_SCALE_FULLSCREEN) | + BIT(DRM_MODE_SCALE_CENTER); + + drm_connector_attach_scaling_mode_property(&connector->base, + allowed_scalers); + + connector->base.state->scaling_mode = DRM_MODE_SCALE_ASPECT; + + connector->base.display_info.panel_orientation = + intel_dsi_get_panel_orientation(connector); + drm_connector_init_panel_orientation_property(&connector->base, + connector->panel.fixed_mode->hdisplay, + connector->panel.fixed_mode->vdisplay); +} + void icl_dsi_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; @@ -1521,6 +1709,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->pre_pll_enable = gen11_dsi_pre_pll_enable; encoder->pre_enable = gen11_dsi_pre_enable; encoder->disable = gen11_dsi_disable; + encoder->post_disable = gen11_dsi_post_disable; encoder->port = port; encoder->get_config = gen11_dsi_get_config; encoder->update_pipe = intel_panel_update_backlight; @@ -1528,7 +1717,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; encoder->cloneable = 0; - encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + encoder->pipe_mask = ~0; encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; @@ -1580,6 +1769,8 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) } icl_dphy_param_init(intel_dsi); + + icl_dsi_add_properties(intel_connector); return; err: diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 90ca11a4ae88..c362eecdd414 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -35,8 +35,9 @@ #include <drm/drm_plane_helper.h> #include "intel_atomic.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_hdcp.h" +#include "intel_psr.h" #include "intel_sprite.h" /** @@ -129,6 +130,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_crtc_state *crtc_state; intel_hdcp_atomic_check(conn, old_state, new_state); + intel_psr_atomic_check(conn, old_state, new_state); if (!new_state->crtc) return 0; @@ -175,6 +177,38 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector) } /** + * intel_connector_needs_modeset - check if connector needs a modeset + */ +bool +intel_connector_needs_modeset(struct intel_atomic_state *state, + struct drm_connector *connector) +{ + const struct drm_connector_state *old_conn_state, *new_conn_state; + + old_conn_state = drm_atomic_get_old_connector_state(&state->base, connector); + new_conn_state = drm_atomic_get_new_connector_state(&state->base, connector); + + return old_conn_state->crtc != new_conn_state->crtc || + (new_conn_state->crtc && + drm_atomic_crtc_needs_modeset(drm_atomic_get_new_crtc_state(&state->base, + new_conn_state->crtc))); +} + +struct intel_digital_connector_state * +intel_atomic_get_digital_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + struct drm_connector_state *conn_state; + + conn_state = drm_atomic_get_connector_state(&state->base, + &connector->base); + if (IS_ERR(conn_state)) + return ERR_CAST(conn_state); + + return to_intel_digital_connector_state(conn_state); +} + +/** * intel_crtc_duplicate_state - duplicate crtc state * @crtc: drm crtc * @@ -186,26 +220,57 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector) struct drm_crtc_state * intel_crtc_duplicate_state(struct drm_crtc *crtc) { + const struct intel_crtc_state *old_crtc_state = to_intel_crtc_state(crtc->state); struct intel_crtc_state *crtc_state; - crtc_state = kmemdup(crtc->state, sizeof(*crtc_state), GFP_KERNEL); + crtc_state = kmemdup(old_crtc_state, sizeof(*crtc_state), GFP_KERNEL); if (!crtc_state) return NULL; - __drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->base); + __drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->uapi); + + /* copy color blobs */ + if (crtc_state->hw.degamma_lut) + drm_property_blob_get(crtc_state->hw.degamma_lut); + if (crtc_state->hw.ctm) + drm_property_blob_get(crtc_state->hw.ctm); + if (crtc_state->hw.gamma_lut) + drm_property_blob_get(crtc_state->hw.gamma_lut); crtc_state->update_pipe = false; crtc_state->disable_lp_wm = false; crtc_state->disable_cxsr = false; crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; - crtc_state->fb_changed = false; crtc_state->fifo_changed = false; + crtc_state->preload_luts = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; - return &crtc_state->base; + return &crtc_state->uapi; +} + +static void intel_crtc_put_color_blobs(struct intel_crtc_state *crtc_state) +{ + drm_property_blob_put(crtc_state->hw.degamma_lut); + drm_property_blob_put(crtc_state->hw.gamma_lut); + drm_property_blob_put(crtc_state->hw.ctm); +} + +void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state) +{ + intel_crtc_put_color_blobs(crtc_state); +} + +void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state) +{ + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + crtc_state->uapi.degamma_lut); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->uapi.gamma_lut); + drm_property_replace_blob(&crtc_state->hw.ctm, + crtc_state->uapi.ctm); } /** @@ -220,7 +285,11 @@ void intel_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state) { - drm_atomic_helper_crtc_destroy_state(crtc, state); + struct intel_crtc_state *crtc_state = to_intel_crtc_state(state); + + __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); + intel_crtc_free_hw_state(crtc_state); + kfree(crtc_state); } static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_state, @@ -249,10 +318,10 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta return; /* set scaler mode */ - if (plane_state && plane_state->base.fb && - plane_state->base.fb->format->is_yuv && - plane_state->base.fb->format->num_planes > 1) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + if (plane_state && plane_state->hw.fb && + plane_state->hw.fb->format->is_yuv && + plane_state->hw.fb->format->num_planes > 1) { + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); if (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv)) { mode = SKL_PS_SCALER_MODE_NV12; @@ -264,10 +333,13 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta */ mode = PS_SCALER_MODE_NORMAL; } else { + struct intel_plane *linked = + plane_state->planar_linked_plane; + mode = PS_SCALER_MODE_PLANAR; - if (plane_state->linked_plane) - mode |= PS_PLANE_Y_SEL(plane_state->linked_plane->id); + if (linked) + mode |= PS_PLANE_Y_SEL(linked->id); } } else if (INTEL_GEN(dev_priv) > 9 || IS_GEMINILAKE(dev_priv)) { mode = PS_SCALER_MODE_NORMAL; @@ -316,7 +388,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; - struct drm_atomic_state *drm_state = crtc_state->base.state; + struct drm_atomic_state *drm_state = crtc_state->uapi.state; struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state); int num_scalers_need; int i; @@ -371,6 +443,15 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, */ if (!plane) { struct drm_plane_state *state; + + /* + * GLK+ scalers don't have a HQ mode so it + * isn't necessary to change between HQ and dyn mode + * on those platforms. + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + continue; + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { @@ -378,13 +459,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, plane->base.id); return PTR_ERR(state); } - - /* - * the plane is added after plane checks are run, - * but since this plane is unchanged just do the - * minimum required validation. - */ - crtc_state->base.planes_changed = true; } intel_plane = to_intel_plane(plane); @@ -425,6 +499,13 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) struct intel_atomic_state *state = to_intel_atomic_state(s); drm_atomic_state_default_clear(&state->base); state->dpll_set = state->modeset = false; + state->global_state_changed = false; + state->active_pipes = 0; + memset(&state->min_cdclk, 0, sizeof(state->min_cdclk)); + memset(&state->min_voltage_level, 0, sizeof(state->min_voltage_level)); + memset(&state->cdclk.logical, 0, sizeof(state->cdclk.logical)); + memset(&state->cdclk.actual, 0, sizeof(state->cdclk.actual)); + state->cdclk.pipe = INVALID_PIPE; } struct intel_crtc_state * @@ -438,3 +519,40 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } + +int intel_atomic_lock_global_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + state->global_state_changed = true; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + int ret; + + ret = drm_modeset_lock(&crtc->base.mutex, + state->base.acquire_ctx); + if (ret) + return ret; + } + + return 0; +} + +int intel_atomic_serialize_global_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + state->global_state_changed = true; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 58065d3161a3..74c749dbfb4f 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -16,6 +16,8 @@ struct drm_crtc_state; struct drm_device; struct drm_i915_private; struct drm_property; +struct intel_atomic_state; +struct intel_connector; struct intel_crtc; struct intel_crtc_state; @@ -31,10 +33,17 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_atomic_state *state); struct drm_connector_state * intel_digital_connector_duplicate_state(struct drm_connector *connector); +bool intel_connector_needs_modeset(struct intel_atomic_state *state, + struct drm_connector *connector); +struct intel_digital_connector_state * +intel_atomic_get_digital_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector); struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc); void intel_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state); +void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state); +void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state); struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev); void intel_atomic_state_clear(struct drm_atomic_state *state); @@ -46,4 +55,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); +int intel_atomic_lock_global_state(struct intel_atomic_state *state); + +int intel_atomic_serialize_global_state(struct intel_atomic_state *state); + #endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 30bd4e76fff9..3e97af682b1b 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -35,11 +35,22 @@ #include <drm/drm_fourcc.h> #include <drm/drm_plane_helper.h> +#include "i915_trace.h" #include "intel_atomic_plane.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_pm.h" #include "intel_sprite.h" +static void intel_plane_state_reset(struct intel_plane_state *plane_state, + struct intel_plane *plane) +{ + memset(plane_state, 0, sizeof(*plane_state)); + + __drm_atomic_helper_plane_state_reset(&plane_state->uapi, &plane->base); + + plane_state->scaler_id = -1; +} + struct intel_plane *intel_plane_alloc(void) { struct intel_plane_state *plane_state; @@ -55,8 +66,9 @@ struct intel_plane *intel_plane_alloc(void) return ERR_PTR(-ENOMEM); } - __drm_atomic_helper_plane_reset(&plane->base, &plane_state->base); - plane_state->scaler_id = -1; + intel_plane_state_reset(plane_state, plane); + + plane->base.state = &plane_state->uapi; return plane; } @@ -79,22 +91,24 @@ void intel_plane_free(struct intel_plane *plane) struct drm_plane_state * intel_plane_duplicate_state(struct drm_plane *plane) { - struct drm_plane_state *state; struct intel_plane_state *intel_state; - intel_state = kmemdup(plane->state, sizeof(*intel_state), GFP_KERNEL); + intel_state = to_intel_plane_state(plane->state); + intel_state = kmemdup(intel_state, sizeof(*intel_state), GFP_KERNEL); if (!intel_state) return NULL; - state = &intel_state->base; - - __drm_atomic_helper_plane_duplicate_state(plane, state); + __drm_atomic_helper_plane_duplicate_state(plane, &intel_state->uapi); intel_state->vma = NULL; intel_state->flags = 0; - return state; + /* add reference to fb */ + if (intel_state->hw.fb) + drm_framebuffer_get(intel_state->hw.fb); + + return &intel_state->uapi; } /** @@ -109,18 +123,22 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { - WARN_ON(to_intel_plane_state(state)->vma); + struct intel_plane_state *plane_state = to_intel_plane_state(state); + WARN_ON(plane_state->vma); - drm_atomic_helper_plane_destroy_state(plane, state); + __drm_atomic_helper_plane_destroy_state(&plane_state->uapi); + if (plane_state->hw.fb) + drm_framebuffer_put(plane_state->hw.fb); + kfree(plane_state); } unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; unsigned int cpp; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; cpp = fb->format->cpp[0]; @@ -137,21 +155,90 @@ unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, return cpp * crtc_state->pixel_rate; } +bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct intel_plane_state *plane_state = + intel_atomic_get_new_plane_state(state, plane); + struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); + struct intel_crtc_state *crtc_state; + + if (!plane_state->uapi.visible || !plane->min_cdclk) + return false; + + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + + crtc_state->min_cdclk[plane->id] = + plane->min_cdclk(crtc_state, plane_state); + + /* + * Does the cdclk need to be bumbed up? + * + * Note: we obviously need to be called before the new + * cdclk frequency is calculated so state->cdclk.logical + * hasn't been populated yet. Hence we look at the old + * cdclk state under dev_priv->cdclk.logical. This is + * safe as long we hold at least one crtc mutex (which + * must be true since we have crtc_state). + */ + if (crtc_state->min_cdclk[plane->id] > dev_priv->cdclk.logical.cdclk) { + DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk (%d kHz) > logical cdclk (%d kHz)\n", + plane->base.base.id, plane->base.name, + crtc_state->min_cdclk[plane->id], + dev_priv->cdclk.logical.cdclk); + return true; + } + + return false; +} + +static void intel_plane_clear_hw_state(struct intel_plane_state *plane_state) +{ + if (plane_state->hw.fb) + drm_framebuffer_put(plane_state->hw.fb); + + memset(&plane_state->hw, 0, sizeof(plane_state->hw)); +} + +void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state, + const struct intel_plane_state *from_plane_state) +{ + intel_plane_clear_hw_state(plane_state); + + plane_state->hw.crtc = from_plane_state->uapi.crtc; + plane_state->hw.fb = from_plane_state->uapi.fb; + if (plane_state->hw.fb) + drm_framebuffer_get(plane_state->hw.fb); + + plane_state->hw.alpha = from_plane_state->uapi.alpha; + plane_state->hw.pixel_blend_mode = + from_plane_state->uapi.pixel_blend_mode; + plane_state->hw.rotation = from_plane_state->uapi.rotation; + plane_state->hw.color_encoding = from_plane_state->uapi.color_encoding; + plane_state->hw.color_range = from_plane_state->uapi.color_range; +} + int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *new_plane_state) { - struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); + const struct drm_framebuffer *fb; int ret; + intel_plane_copy_uapi_to_hw_state(new_plane_state, new_plane_state); + fb = new_plane_state->hw.fb; + new_crtc_state->active_planes &= ~BIT(plane->id); new_crtc_state->nv12_planes &= ~BIT(plane->id); new_crtc_state->c8_planes &= ~BIT(plane->id); new_crtc_state->data_rate[plane->id] = 0; - new_plane_state->base.visible = false; + new_crtc_state->min_cdclk[plane->id] = 0; + new_plane_state->uapi.visible = false; - if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) + if (!new_plane_state->hw.crtc && !old_plane_state->hw.crtc) return 0; ret = plane->check_plane(new_crtc_state, new_plane_state); @@ -159,50 +246,63 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ return ret; /* FIXME pre-g4x don't work like this */ - if (new_plane_state->base.visible) + if (new_plane_state->uapi.visible) new_crtc_state->active_planes |= BIT(plane->id); - if (new_plane_state->base.visible && - is_planar_yuv_format(new_plane_state->base.fb->format->format)) + if (new_plane_state->uapi.visible && + intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) new_crtc_state->nv12_planes |= BIT(plane->id); - if (new_plane_state->base.visible && - new_plane_state->base.fb->format->format == DRM_FORMAT_C8) + if (new_plane_state->uapi.visible && + fb->format->format == DRM_FORMAT_C8) new_crtc_state->c8_planes |= BIT(plane->id); - if (new_plane_state->base.visible || old_plane_state->base.visible) + if (new_plane_state->uapi.visible || old_plane_state->uapi.visible) new_crtc_state->update_planes |= BIT(plane->id); new_crtc_state->data_rate[plane->id] = intel_plane_data_rate(new_crtc_state, new_plane_state); - return intel_plane_atomic_calc_changes(old_crtc_state, - &new_crtc_state->base, - old_plane_state, - &new_plane_state->base); + return intel_plane_atomic_calc_changes(old_crtc_state, new_crtc_state, + old_plane_state, new_plane_state); } -static int intel_plane_atomic_check(struct drm_plane *plane, - struct drm_plane_state *new_plane_state) +static struct intel_crtc * +get_crtc_from_states(const struct intel_plane_state *old_plane_state, + const struct intel_plane_state *new_plane_state) { - struct drm_atomic_state *state = new_plane_state->state; - const struct drm_plane_state *old_plane_state = - drm_atomic_get_old_plane_state(state, plane); - struct drm_crtc *crtc = new_plane_state->crtc ?: old_plane_state->crtc; - const struct drm_crtc_state *old_crtc_state; - struct drm_crtc_state *new_crtc_state; - - new_plane_state->visible = false; + if (new_plane_state->uapi.crtc) + return to_intel_crtc(new_plane_state->uapi.crtc); + + if (old_plane_state->uapi.crtc) + return to_intel_crtc(old_plane_state->uapi.crtc); + + return NULL; +} + +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane) +{ + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, plane); + const struct intel_plane_state *old_plane_state = + intel_atomic_get_old_plane_state(state, plane); + struct intel_crtc *crtc = + get_crtc_from_states(old_plane_state, new_plane_state); + const struct intel_crtc_state *old_crtc_state; + struct intel_crtc_state *new_crtc_state; + + new_plane_state->uapi.visible = false; if (!crtc) return 0; - old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); - new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); + new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - return intel_plane_atomic_check_with_state(to_intel_crtc_state(old_crtc_state), - to_intel_crtc_state(new_crtc_state), - to_intel_plane_state(old_plane_state), - to_intel_plane_state(new_plane_state)); + return intel_plane_atomic_check_with_state(old_crtc_state, + new_crtc_state, + old_plane_state, + new_plane_state); } static struct intel_plane * @@ -253,26 +353,16 @@ void intel_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); trace_intel_update_plane(&plane->base, crtc); plane->update_plane(plane, crtc_state, plane_state); } -void intel_update_slave(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - - trace_intel_update_plane(&plane->base, crtc); - plane->update_slave(plane, crtc_state, plane_state); -} - void intel_disable_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); trace_intel_disable_plane(&plane->base, crtc); plane->disable_plane(plane, crtc_state); @@ -301,25 +391,9 @@ void skl_update_planes_on_crtc(struct intel_atomic_state *state, struct intel_plane_state *new_plane_state = intel_atomic_get_new_plane_state(state, plane); - if (new_plane_state->base.visible) { + if (new_plane_state->uapi.visible || + new_plane_state->planar_slave) { intel_update_plane(plane, new_crtc_state, new_plane_state); - } else if (new_plane_state->slave) { - struct intel_plane *master = - new_plane_state->linked_plane; - - /* - * We update the slave plane from this function because - * programming it from the master plane's update_plane - * callback runs into issues when the Y plane is - * reassigned, disabled or used by a different plane. - * - * The slave plane is updated with the master plane's - * plane_state. - */ - new_plane_state = - intel_atomic_get_new_plane_state(state, master); - - intel_update_slave(plane, new_crtc_state, new_plane_state); } else { intel_disable_plane(plane, new_crtc_state); } @@ -341,7 +415,7 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state, !(update_mask & BIT(plane->id))) continue; - if (new_plane_state->base.visible) + if (new_plane_state->uapi.visible) intel_update_plane(plane, new_crtc_state, new_plane_state); else intel_disable_plane(plane, new_crtc_state); @@ -351,5 +425,4 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state, const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, - .atomic_check = intel_plane_atomic_check, }; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index 1437a8797e10..5cedafdddb55 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -8,7 +8,6 @@ #include <linux/types.h> -struct drm_crtc_state; struct drm_plane; struct drm_property; struct intel_atomic_state; @@ -21,12 +20,11 @@ extern const struct drm_plane_helper_funcs intel_plane_helper_funcs; unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); +void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state, + const struct intel_plane_state *from_plane_state); void intel_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); -void intel_update_slave(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); void intel_disable_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); struct intel_plane *intel_plane_alloc(void); @@ -42,9 +40,13 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane); int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, - struct drm_crtc_state *crtc_state, + struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, - struct drm_plane_state *plane_state); + struct intel_plane_state *plane_state); +bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane); #endif /* __INTEL_ATOMIC_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 840daff12246..b18040793d9e 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -28,8 +28,9 @@ #include <drm/i915_component.h> #include "i915_drv.h" +#include "intel_atomic.h" #include "intel_audio.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_lpe_audio.h" /** @@ -72,6 +73,13 @@ struct dp_aud_n_m { u16 n; }; +struct hdmi_aud_ncts { + int sample_rate; + int clock; + int n; + int cts; +}; + /* Values according to DP 1.4 Table 2-104 */ static const struct dp_aud_n_m dp_aud_n_m[] = { { 32000, LC_162M, 1024, 10125 }, @@ -148,12 +156,7 @@ static const struct { #define TMDS_594M 594000 #define TMDS_593M 593407 -static const struct { - int sample_rate; - int clock; - int n; - int cts; -} hdmi_aud_ncts[] = { +static const struct hdmi_aud_ncts hdmi_aud_ncts_24bpp[] = { { 32000, TMDS_296M, 5824, 421875 }, { 32000, TMDS_297M, 3072, 222750 }, { 32000, TMDS_593M, 5824, 843750 }, @@ -184,11 +187,54 @@ static const struct { { 192000, TMDS_594M, 24576, 594000 }, }; +/* Appendix C - N & CTS values for deep color from HDMI 2.0 spec*/ +/* HDMI N/CTS table for 10 bit deep color(30 bpp)*/ +#define TMDS_371M 371250 +#define TMDS_370M 370878 + +static const struct hdmi_aud_ncts hdmi_aud_ncts_30bpp[] = { + { 32000, TMDS_370M, 5824, 527344 }, + { 32000, TMDS_371M, 6144, 556875 }, + { 44100, TMDS_370M, 8918, 585938 }, + { 44100, TMDS_371M, 4704, 309375 }, + { 88200, TMDS_370M, 17836, 585938 }, + { 88200, TMDS_371M, 9408, 309375 }, + { 176400, TMDS_370M, 35672, 585938 }, + { 176400, TMDS_371M, 18816, 309375 }, + { 48000, TMDS_370M, 11648, 703125 }, + { 48000, TMDS_371M, 5120, 309375 }, + { 96000, TMDS_370M, 23296, 703125 }, + { 96000, TMDS_371M, 10240, 309375 }, + { 192000, TMDS_370M, 46592, 703125 }, + { 192000, TMDS_371M, 20480, 309375 }, +}; + +/* HDMI N/CTS table for 12 bit deep color(36 bpp)*/ +#define TMDS_445_5M 445500 +#define TMDS_445M 445054 + +static const struct hdmi_aud_ncts hdmi_aud_ncts_36bpp[] = { + { 32000, TMDS_445M, 5824, 632813 }, + { 32000, TMDS_445_5M, 4096, 445500 }, + { 44100, TMDS_445M, 8918, 703125 }, + { 44100, TMDS_445_5M, 4704, 371250 }, + { 88200, TMDS_445M, 17836, 703125 }, + { 88200, TMDS_445_5M, 9408, 371250 }, + { 176400, TMDS_445M, 35672, 703125 }, + { 176400, TMDS_445_5M, 18816, 371250 }, + { 48000, TMDS_445M, 5824, 421875 }, + { 48000, TMDS_445_5M, 5120, 371250 }, + { 96000, TMDS_445M, 11648, 421875 }, + { 96000, TMDS_445_5M, 10240, 371250 }, + { 192000, TMDS_445M, 23296, 421875 }, + { 192000, TMDS_445_5M, 20480, 371250 }, +}; + /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_state) { const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; int i; for (i = 0; i < ARRAY_SIZE(hdmi_audio_clock); i++) { @@ -212,14 +258,24 @@ static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_sta static int audio_config_hdmi_get_n(const struct intel_crtc_state *crtc_state, int rate) { - const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; - int i; + const struct hdmi_aud_ncts *hdmi_ncts_table; + int i, size; + + if (crtc_state->pipe_bpp == 36) { + hdmi_ncts_table = hdmi_aud_ncts_36bpp; + size = ARRAY_SIZE(hdmi_aud_ncts_36bpp); + } else if (crtc_state->pipe_bpp == 30) { + hdmi_ncts_table = hdmi_aud_ncts_30bpp; + size = ARRAY_SIZE(hdmi_aud_ncts_30bpp); + } else { + hdmi_ncts_table = hdmi_aud_ncts_24bpp; + size = ARRAY_SIZE(hdmi_aud_ncts_24bpp); + } - for (i = 0; i < ARRAY_SIZE(hdmi_aud_ncts); i++) { - if (rate == hdmi_aud_ncts[i].sample_rate && - adjusted_mode->crtc_clock == hdmi_aud_ncts[i].clock) { - return hdmi_aud_ncts[i].n; + for (i = 0; i < size; i++) { + if (rate == hdmi_ncts_table[i].sample_rate && + crtc_state->port_clock == hdmi_ncts_table[i].clock) { + return hdmi_ncts_table[i].n; } } return 0; @@ -499,14 +555,15 @@ static void ilk_audio_codec_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; enum port port = encoder->port; u32 tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; - DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n", - port_name(port), pipe_name(pipe)); + DRM_DEBUG_KMS("Disable audio codec on [ENCODER:%d:%s], pipe %c\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe)); if (WARN_ON(port == PORT_A)) return; @@ -545,7 +602,7 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_connector *connector = conn_state->connector; enum pipe pipe = crtc->pipe; enum port port = encoder->port; @@ -554,8 +611,9 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder, int len, i; i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2; - DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n", - port_name(port), pipe_name(pipe), drm_eld_size(eld)); + DRM_DEBUG_KMS("Enable audio codec on [ENCODER:%d:%s], pipe %c, %u bytes ELD\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe), drm_eld_size(eld)); if (WARN_ON(port == PORT_A)) return; @@ -634,10 +692,10 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_connector *connector = conn_state->connector; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; enum port port = encoder->port; enum pipe pipe = crtc->pipe; @@ -649,8 +707,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, - connector->encoder->base.id, - connector->encoder->name); + encoder->base.base.id, + encoder->base.name); connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; @@ -695,7 +753,7 @@ void intel_audio_codec_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum port port = encoder->port; enum pipe pipe = crtc->pipe; @@ -761,13 +819,8 @@ retry: to_intel_atomic_state(state)->cdclk.force_min_cdclk = enable ? 2 * 96000 : 0; - /* - * Protects dev_priv->cdclk.force_min_cdclk - * Need to lock this here in case we have no active pipes - * and thus wouldn't lock it during the commit otherwise. - */ - ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, - &ctx); + /* Protects dev_priv->cdclk.force_min_cdclk */ + ret = intel_atomic_lock_global_state(to_intel_atomic_state(state)); if (!ret) ret = drm_atomic_commit(state); @@ -795,11 +848,23 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - /* Force CDCLK to 2*BCLK as long as we need audio to be powered. */ - if (dev_priv->audio_power_refcount++ == 0) - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (dev_priv->audio_power_refcount++ == 0) { + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + I915_WRITE(AUD_FREQ_CNTRL, dev_priv->audio_freq_cntrl); + DRM_DEBUG_KMS("restored AUD_FREQ_CNTRL to 0x%x\n", + dev_priv->audio_freq_cntrl); + } + + /* Force CDCLK to 2*BCLK as long as we need audio powered. */ + if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + I915_WRITE(AUD_PIN_BUF_CTL, + (I915_READ(AUD_PIN_BUF_CTL) | + AUD_PIN_BUF_ENABLE)); + } + return ret; } @@ -810,7 +875,7 @@ static void i915_audio_component_put_power(struct device *kdev, /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */ if (--dev_priv->audio_power_refcount == 0) - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); @@ -1059,6 +1124,12 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv) return; } + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + dev_priv->audio_freq_cntrl = I915_READ(AUD_FREQ_CNTRL); + DRM_DEBUG_KMS("init value of AUD_FREQ_CNTRL of 0x%x\n", + dev_priv->audio_freq_cntrl); + } + dev_priv->audio_component_registered = true; } diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 3ef4e9f573cf..ef4017a1baba 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -28,6 +28,8 @@ #include <drm/drm_dp_helper.h> #include <drm/i915_drm.h> +#include "display/intel_display.h" +#include "display/intel_display_types.h" #include "display/intel_gmbus.h" #include "i915_drv.h" @@ -57,6 +59,13 @@ * that. */ +/* Wrapper for VBT child device config */ +struct display_device_data { + struct child_device_config child; + struct dsc_compression_parameters_entry *dsc; + struct list_head node; +}; + #define SLAVE_ADDR1 0x70 #define SLAVE_ADDR2 0x72 @@ -201,17 +210,12 @@ get_lvds_fp_timing(const struct bdb_header *bdb, return (const struct lvds_fp_timing *)((const u8 *)bdb + ofs); } -/* Try to find integrated panel data */ +/* Parse general panel options */ static void -parse_lfp_panel_data(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_panel_options(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { const struct bdb_lvds_options *lvds_options; - const struct bdb_lvds_lfp_data *lvds_lfp_data; - const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; - const struct lvds_dvo_timing *panel_dvo_timing; - const struct lvds_fp_timing *fp_timing; - struct drm_display_mode *panel_fixed_mode; int panel_type; int drrs_mode; int ret; @@ -260,6 +264,19 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("DRRS not supported (VBT input)\n"); break; } +} + +/* Try to find integrated panel timing data */ +static void +parse_lfp_panel_dtd(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) +{ + const struct bdb_lvds_lfp_data *lvds_lfp_data; + const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; + const struct lvds_dvo_timing *panel_dvo_timing; + const struct lvds_fp_timing *fp_timing; + struct drm_display_mode *panel_fixed_mode; + int panel_type = dev_priv->vbt.panel_type; lvds_lfp_data = find_section(bdb, BDB_LVDS_LFP_DATA); if (!lvds_lfp_data) @@ -281,7 +298,7 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, dev_priv->vbt.lfp_lvds_vbt_mode = panel_fixed_mode; - DRM_DEBUG_KMS("Found panel mode in BIOS VBT tables:\n"); + DRM_DEBUG_KMS("Found panel mode in BIOS VBT legacy lfp table:\n"); drm_mode_debug_printmodeline(panel_fixed_mode); fp_timing = get_lvds_fp_timing(bdb, lvds_lfp_data, @@ -299,6 +316,100 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, } static void +parse_generic_dtd(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) +{ + const struct bdb_generic_dtd *generic_dtd; + const struct generic_dtd_entry *dtd; + struct drm_display_mode *panel_fixed_mode; + int num_dtd; + + generic_dtd = find_section(bdb, BDB_GENERIC_DTD); + if (!generic_dtd) + return; + + if (generic_dtd->gdtd_size < sizeof(struct generic_dtd_entry)) { + DRM_ERROR("GDTD size %u is too small.\n", + generic_dtd->gdtd_size); + return; + } else if (generic_dtd->gdtd_size != + sizeof(struct generic_dtd_entry)) { + DRM_ERROR("Unexpected GDTD size %u\n", generic_dtd->gdtd_size); + /* DTD has unknown fields, but keep going */ + } + + num_dtd = (get_blocksize(generic_dtd) - + sizeof(struct bdb_generic_dtd)) / generic_dtd->gdtd_size; + if (dev_priv->vbt.panel_type >= num_dtd) { + DRM_ERROR("Panel type %d not found in table of %d DTD's\n", + dev_priv->vbt.panel_type, num_dtd); + return; + } + + dtd = &generic_dtd->dtd[dev_priv->vbt.panel_type]; + + panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL); + if (!panel_fixed_mode) + return; + + panel_fixed_mode->hdisplay = dtd->hactive; + panel_fixed_mode->hsync_start = + panel_fixed_mode->hdisplay + dtd->hfront_porch; + panel_fixed_mode->hsync_end = + panel_fixed_mode->hsync_start + dtd->hsync; + panel_fixed_mode->htotal = + panel_fixed_mode->hdisplay + dtd->hblank; + + panel_fixed_mode->vdisplay = dtd->vactive; + panel_fixed_mode->vsync_start = + panel_fixed_mode->vdisplay + dtd->vfront_porch; + panel_fixed_mode->vsync_end = + panel_fixed_mode->vsync_start + dtd->vsync; + panel_fixed_mode->vtotal = + panel_fixed_mode->vdisplay + dtd->vblank; + + panel_fixed_mode->clock = dtd->pixel_clock; + panel_fixed_mode->width_mm = dtd->width_mm; + panel_fixed_mode->height_mm = dtd->height_mm; + + panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; + drm_mode_set_name(panel_fixed_mode); + + if (dtd->hsync_positive_polarity) + panel_fixed_mode->flags |= DRM_MODE_FLAG_PHSYNC; + else + panel_fixed_mode->flags |= DRM_MODE_FLAG_NHSYNC; + + if (dtd->vsync_positive_polarity) + panel_fixed_mode->flags |= DRM_MODE_FLAG_PVSYNC; + else + panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC; + + DRM_DEBUG_KMS("Found panel mode in BIOS VBT generic dtd table:\n"); + drm_mode_debug_printmodeline(panel_fixed_mode); + + dev_priv->vbt.lfp_lvds_vbt_mode = panel_fixed_mode; +} + +static void +parse_panel_dtd(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) +{ + /* + * Older VBTs provided provided DTD information for internal displays + * through the "LFP panel DTD" block (42). As of VBT revision 229, + * that block is now deprecated and DTD information should be provided + * via a newer "generic DTD" block (58). Just to be safe, we'll + * try the new generic DTD block first on VBT >= 229, but still fall + * back to trying the old LFP block if that fails. + */ + if (bdb->version >= 229) + parse_generic_dtd(dev_priv, bdb); + if (!dev_priv->vbt.lfp_lvds_vbt_mode) + parse_lfp_panel_dtd(dev_priv, bdb); +} + +static void parse_lfp_backlight(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { @@ -448,8 +559,9 @@ static void parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) { struct sdvo_device_mapping *mapping; + const struct display_device_data *devdata; const struct child_device_config *child; - int i, count = 0; + int count = 0; /* * Only parse SDVO mappings on gens that could have SDVO. This isn't @@ -460,8 +572,8 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) return; } - for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; if (child->slave_addr != SLAVE_ADDR1 && child->slave_addr != SLAVE_ADDR2) { @@ -551,16 +663,45 @@ parse_driver_features(struct drm_i915_private *dev_priv, dev_priv->vbt.int_lvds_support = 0; } - DRM_DEBUG_KMS("DRRS State Enabled:%d\n", driver->drrs_enabled); + if (bdb->version < 228) { + DRM_DEBUG_KMS("DRRS State Enabled:%d\n", driver->drrs_enabled); + /* + * If DRRS is not supported, drrs_type has to be set to 0. + * This is because, VBT is configured in such a way that + * static DRRS is 0 and DRRS not supported is represented by + * driver->drrs_enabled=false + */ + if (!driver->drrs_enabled) + dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; + + dev_priv->vbt.psr.enable = driver->psr_enabled; + } +} + +static void +parse_power_conservation_features(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) +{ + const struct bdb_lfp_power *power; + u8 panel_type = dev_priv->vbt.panel_type; + + if (bdb->version < 228) + return; + + power = find_section(bdb, BDB_LVDS_POWER); + if (!power) + return; + + dev_priv->vbt.psr.enable = power->psr & BIT(panel_type); + /* * If DRRS is not supported, drrs_type has to be set to 0. * This is because, VBT is configured in such a way that * static DRRS is 0 and DRRS not supported is represented by - * driver->drrs_enabled=false + * power->drrs & BIT(panel_type)=false */ - if (!driver->drrs_enabled) + if (!(power->drrs & BIT(panel_type))) dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; - dev_priv->vbt.psr.enable = driver->psr_enabled; } static void @@ -1229,6 +1370,57 @@ err: memset(dev_priv->vbt.dsi.sequence, 0, sizeof(dev_priv->vbt.dsi.sequence)); } +static void +parse_compression_parameters(struct drm_i915_private *i915, + const struct bdb_header *bdb) +{ + const struct bdb_compression_parameters *params; + struct display_device_data *devdata; + const struct child_device_config *child; + u16 block_size; + int index; + + if (bdb->version < 198) + return; + + params = find_section(bdb, BDB_COMPRESSION_PARAMETERS); + if (params) { + /* Sanity checks */ + if (params->entry_size != sizeof(params->data[0])) { + DRM_DEBUG_KMS("VBT: unsupported compression param entry size\n"); + return; + } + + block_size = get_blocksize(params); + if (block_size < sizeof(*params)) { + DRM_DEBUG_KMS("VBT: expected 16 compression param entries\n"); + return; + } + } + + list_for_each_entry(devdata, &i915->vbt.display_devices, node) { + child = &devdata->child; + + if (!child->compression_enable) + continue; + + if (!params) { + DRM_DEBUG_KMS("VBT: compression params not available\n"); + continue; + } + + if (child->compression_method_cps) { + DRM_DEBUG_KMS("VBT: CPS compression not supported\n"); + continue; + } + + index = child->compression_structure_index; + + devdata->dsc = kmemdup(¶ms->data[index], + sizeof(*devdata->dsc), GFP_KERNEL); + } +} + static u8 translate_iboost(u8 val) { static const u8 mapping[] = { 1, 3, 7 }; /* See VBT spec */ @@ -1245,7 +1437,7 @@ static enum port get_port_by_ddc_pin(struct drm_i915_private *i915, u8 ddc_pin) const struct ddi_vbt_port_info *info; enum port port; - for (port = PORT_A; port < I915_MAX_PORTS; port++) { + for_each_port(port) { info = &i915->vbt.ddi_port_info[port]; if (info->child && ddc_pin == info->alternate_ddc_pin) @@ -1269,7 +1461,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " "disabling port %c DVI/HDMI support\n", port_name(port), info->alternate_ddc_pin, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedly sharing the @@ -1277,9 +1469,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, * port. Otherwise they share the same ddc bin and * system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dvi = false; info->supports_hdmi = false; info->alternate_ddc_pin = 0; @@ -1291,7 +1488,7 @@ static enum port get_port_by_aux_ch(struct drm_i915_private *i915, u8 aux_ch) const struct ddi_vbt_port_info *info; enum port port; - for (port = PORT_A; port < I915_MAX_PORTS; port++) { + for_each_port(port) { info = &i915->vbt.ddi_port_info[port]; if (info->child && aux_ch == info->alternate_aux_channel) @@ -1315,7 +1512,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " "disabling port %c DP support\n", port_name(port), info->alternate_aux_channel, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedlt sharing the @@ -1323,9 +1520,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, * port. Otherwise they share the same aux channel * and system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dp = false; info->alternate_aux_channel = 0; } @@ -1342,16 +1544,13 @@ static const u8 cnp_ddc_pin_map[] = { static const u8 icp_ddc_pin_map[] = { [ICL_DDC_BUS_DDI_A] = GMBUS_PIN_1_BXT, [ICL_DDC_BUS_DDI_B] = GMBUS_PIN_2_BXT, + [TGL_DDC_BUS_DDI_C] = GMBUS_PIN_3_BXT, [ICL_DDC_BUS_PORT_1] = GMBUS_PIN_9_TC1_ICP, [ICL_DDC_BUS_PORT_2] = GMBUS_PIN_10_TC2_ICP, [ICL_DDC_BUS_PORT_3] = GMBUS_PIN_11_TC3_ICP, [ICL_DDC_BUS_PORT_4] = GMBUS_PIN_12_TC4_ICP, -}; - -static const u8 mcc_ddc_pin_map[] = { - [MCC_DDC_BUS_DDI_A] = GMBUS_PIN_1_BXT, - [MCC_DDC_BUS_DDI_B] = GMBUS_PIN_2_BXT, - [MCC_DDC_BUS_DDI_C] = GMBUS_PIN_9_TC1_ICP, + [TGL_DDC_BUS_PORT_5] = GMBUS_PIN_13_TC5_TGP, + [TGL_DDC_BUS_PORT_6] = GMBUS_PIN_14_TC6_TGP, }; static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) @@ -1359,10 +1558,7 @@ static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) const u8 *ddc_pin_map; int n_entries; - if (HAS_PCH_MCC(dev_priv)) { - ddc_pin_map = mcc_ddc_pin_map; - n_entries = ARRAY_SIZE(mcc_ddc_pin_map); - } else if (HAS_PCH_ICP(dev_priv)) { + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) { ddc_pin_map = icp_ddc_pin_map; n_entries = ARRAY_SIZE(icp_ddc_pin_map); } else if (HAS_PCH_CNP(dev_priv)) { @@ -1394,6 +1590,7 @@ static enum port dvo_port_to_port(u8 dvo_port) [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1}, [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1}, + [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1}, }; enum port port; int i; @@ -1412,9 +1609,10 @@ static enum port dvo_port_to_port(u8 dvo_port) } static void parse_ddi_port(struct drm_i915_private *dev_priv, - const struct child_device_config *child, + struct display_device_data *devdata, u8 bdb_version) { + const struct child_device_config *child = &devdata->child; struct ddi_vbt_port_info *info; bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; enum port port; @@ -1437,7 +1635,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, is_hdmi = is_dvi && (child->device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; is_edp = is_dp && (child->device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); - if (port == PORT_A && is_dvi) { + if (port == PORT_A && is_dvi && INTEL_GEN(dev_priv) < 12) { DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", is_hdmi ? "/HDMI" : ""); is_dvi = false; @@ -1455,26 +1653,11 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, if (bdb_version >= 209) info->supports_tbt = child->tbt; - DRM_DEBUG_KMS("Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d LSPCON:%d USB-Type-C:%d TBT:%d\n", + DRM_DEBUG_KMS("Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d LSPCON:%d USB-Type-C:%d TBT:%d DSC:%d\n", port_name(port), is_crt, is_dvi, is_hdmi, is_dp, is_edp, HAS_LSPCON(dev_priv) && child->lspcon, - info->supports_typec_usb, info->supports_tbt); - - if (is_edp && is_dvi) - DRM_DEBUG_KMS("Internal DP port %c is TMDS compatible\n", - port_name(port)); - if (is_crt && port != PORT_E) - DRM_DEBUG_KMS("Port %c is analog\n", port_name(port)); - if (is_crt && (is_dvi || is_dp)) - DRM_DEBUG_KMS("Analog port %c is also DP or TMDS compatible\n", - port_name(port)); - if (is_dvi && (port == PORT_A || port == PORT_E)) - DRM_DEBUG_KMS("Port %c is TMDS compatible\n", port_name(port)); - if (!is_dvi && !is_dp && !is_crt) - DRM_DEBUG_KMS("Port %c is not DP/TMDS/CRT compatible\n", - port_name(port)); - if (is_edp && (port == PORT_B || port == PORT_C || port == PORT_E)) - DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); + info->supports_typec_usb, info->supports_tbt, + devdata->dsc != NULL); if (is_dvi) { u8 ddc_pin; @@ -1503,6 +1686,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, port_name(port), hdmi_level_shift); info->hdmi_level_shift = hdmi_level_shift; + info->hdmi_level_shift_set = true; } if (bdb_version >= 204) { @@ -1565,8 +1749,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) { - const struct child_device_config *child; - int i; + struct display_device_data *devdata; if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; @@ -1574,11 +1757,8 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) if (bdb_version < 155) return; - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; - - parse_ddi_port(dev_priv, child, bdb_version); - } + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) + parse_ddi_port(dev_priv, devdata, bdb_version); } static void @@ -1586,8 +1766,9 @@ parse_general_definitions(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { const struct bdb_general_definitions *defs; + struct display_device_data *devdata; const struct child_device_config *child; - int i, child_device_num, count; + int i, child_device_num; u8 expected_size; u16 block_size; int bus_pin; @@ -1620,7 +1801,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv, expected_size = 37; } else if (bdb->version <= 215) { expected_size = 38; - } else if (bdb->version <= 216) { + } else if (bdb->version <= 229) { expected_size = 39; } else { expected_size = sizeof(*child); @@ -1643,48 +1824,38 @@ parse_general_definitions(struct drm_i915_private *dev_priv, /* get the number of child device */ child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; - count = 0; - /* get the number of child device that is present */ - for (i = 0; i < child_device_num; i++) { - child = child_device_ptr(defs, i); - if (!child->device_type) - continue; - count++; - } - if (!count) { - DRM_DEBUG_KMS("no child dev is parsed from VBT\n"); - return; - } - dev_priv->vbt.child_dev = kcalloc(count, sizeof(*child), GFP_KERNEL); - if (!dev_priv->vbt.child_dev) { - DRM_DEBUG_KMS("No memory space for child device\n"); - return; - } - dev_priv->vbt.child_dev_num = count; - count = 0; for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); if (!child->device_type) continue; + DRM_DEBUG_KMS("Found VBT child device with type 0x%x\n", + child->device_type); + + devdata = kzalloc(sizeof(*devdata), GFP_KERNEL); + if (!devdata) + break; + /* * Copy as much as we know (sizeof) and is available - * (child_dev_size) of the child device. Accessing the data must - * depend on VBT version. + * (child_dev_size) of the child device config. Accessing the + * data must depend on VBT version. */ - memcpy(dev_priv->vbt.child_dev + count, child, + memcpy(&devdata->child, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - count++; + + list_add_tail(&devdata->node, &dev_priv->vbt.display_devices); } + + if (list_empty(&dev_priv->vbt.display_devices)) + DRM_DEBUG_KMS("no child dev is parsed from VBT\n"); } /* Common defaults which may be overridden by VBT. */ static void init_vbt_defaults(struct drm_i915_private *dev_priv) { - enum port port; - dev_priv->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; /* Default to having backlight */ @@ -1712,13 +1883,6 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) dev_priv->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(dev_priv, !HAS_PCH_SPLIT(dev_priv)); DRM_DEBUG_KMS("Set default to SSC at %d kHz\n", dev_priv->vbt.lvds_ssc_freq); - - for (port = PORT_A; port < I915_MAX_PORTS; port++) { - struct ddi_vbt_port_info *info = - &dev_priv->vbt.ddi_port_info[port]; - - info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN; - } } /* Defaults to initialize only if there is no VBT. */ @@ -1727,15 +1891,16 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv) { enum port port; - for (port = PORT_A; port < I915_MAX_PORTS; port++) { + for_each_port(port) { struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; + enum phy phy = intel_port_to_phy(dev_priv, port); /* * VBT has the TypeC mode (native,TBT/USB) and we don't want * to detect it. */ - if (intel_port_is_tc(dev_priv, port)) + if (intel_phy_is_tc(dev_priv, phy)) continue; info->supports_dvi = (port != PORT_A && port != PORT_E); @@ -1777,6 +1942,13 @@ bool intel_bios_is_valid_vbt(const void *buf, size_t size) return false; } + if (vbt->vbt_size > size) { + DRM_DEBUG_DRIVER("VBT incomplete (vbt_size overflows)\n"); + return false; + } + + size = vbt->vbt_size; + if (range_overflows_t(size_t, vbt->bdb_offset, sizeof(struct bdb_header), @@ -1794,28 +1966,61 @@ bool intel_bios_is_valid_vbt(const void *buf, size_t size) return vbt; } -static const struct vbt_header *find_vbt(void __iomem *bios, size_t size) +static struct vbt_header *oprom_get_vbt(struct drm_i915_private *dev_priv) { - size_t i; + struct pci_dev *pdev = dev_priv->drm.pdev; + void __iomem *p = NULL, *oprom; + struct vbt_header *vbt; + u16 vbt_size; + size_t i, size; - /* Scour memory looking for the VBT signature. */ - for (i = 0; i + 4 < size; i++) { - void *vbt; + oprom = pci_map_rom(pdev, &size); + if (!oprom) + return NULL; - if (ioread32(bios + i) != *((const u32 *) "$VBT")) + /* Scour memory looking for the VBT signature. */ + for (i = 0; i + 4 < size; i += 4) { + if (ioread32(oprom + i) != *((const u32 *)"$VBT")) continue; - /* - * This is the one place where we explicitly discard the address - * space (__iomem) of the BIOS/VBT. - */ - vbt = (void __force *) bios + i; - if (intel_bios_is_valid_vbt(vbt, size - i)) - return vbt; - + p = oprom + i; + size -= i; break; } + if (!p) + goto err_unmap_oprom; + + if (sizeof(struct vbt_header) > size) { + DRM_DEBUG_DRIVER("VBT header incomplete\n"); + goto err_unmap_oprom; + } + + vbt_size = ioread16(p + offsetof(struct vbt_header, vbt_size)); + if (vbt_size > size) { + DRM_DEBUG_DRIVER("VBT incomplete (vbt_size overflows)\n"); + goto err_unmap_oprom; + } + + /* The rest will be validated by intel_bios_is_valid_vbt() */ + vbt = kmalloc(vbt_size, GFP_KERNEL); + if (!vbt) + goto err_unmap_oprom; + + memcpy_fromio(vbt, p, vbt_size); + + if (!intel_bios_is_valid_vbt(vbt, vbt_size)) + goto err_free_vbt; + + pci_unmap_rom(pdev, oprom); + + return vbt; + +err_free_vbt: + kfree(vbt); +err_unmap_oprom: + pci_unmap_rom(pdev, oprom); + return NULL; } @@ -1829,12 +2034,13 @@ static const struct vbt_header *find_vbt(void __iomem *bios, size_t size) */ void intel_bios_init(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; const struct vbt_header *vbt = dev_priv->opregion.vbt; + struct vbt_header *oprom_vbt = NULL; const struct bdb_header *bdb; - u8 __iomem *bios = NULL; - if (!HAS_DISPLAY(dev_priv)) { + INIT_LIST_HEAD(&dev_priv->vbt.display_devices); + + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) { DRM_DEBUG_KMS("Skipping VBT init due to disabled display.\n"); return; } @@ -1843,15 +2049,11 @@ void intel_bios_init(struct drm_i915_private *dev_priv) /* If the OpRegion does not have VBT, look in PCI ROM. */ if (!vbt) { - size_t size; - - bios = pci_map_rom(pdev, &size); - if (!bios) + oprom_vbt = oprom_get_vbt(dev_priv); + if (!oprom_vbt) goto out; - vbt = find_vbt(bios, size); - if (!vbt) - goto out; + vbt = oprom_vbt; DRM_DEBUG_KMS("Found valid VBT in PCI ROM\n"); } @@ -1864,15 +2066,20 @@ void intel_bios_init(struct drm_i915_private *dev_priv) /* Grab useful general definitions */ parse_general_features(dev_priv, bdb); parse_general_definitions(dev_priv, bdb); - parse_lfp_panel_data(dev_priv, bdb); + parse_panel_options(dev_priv, bdb); + parse_panel_dtd(dev_priv, bdb); parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); parse_driver_features(dev_priv, bdb); + parse_power_conservation_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); parse_mipi_config(dev_priv, bdb); parse_mipi_sequence(dev_priv, bdb); + /* Depends on child device list */ + parse_compression_parameters(dev_priv, bdb); + /* Further processing on pre-parsed data */ parse_sdvo_device_mapping(dev_priv, bdb->version); parse_ddi_ports(dev_priv, bdb->version); @@ -1883,19 +2090,23 @@ out: init_vbt_missing_defaults(dev_priv); } - if (bios) - pci_unmap_rom(pdev, bios); + kfree(oprom_vbt); } /** - * intel_bios_cleanup - Free any resources allocated by intel_bios_init() + * intel_bios_driver_remove - Free any resources allocated by intel_bios_init() * @dev_priv: i915 device instance */ -void intel_bios_cleanup(struct drm_i915_private *dev_priv) +void intel_bios_driver_remove(struct drm_i915_private *dev_priv) { - kfree(dev_priv->vbt.child_dev); - dev_priv->vbt.child_dev = NULL; - dev_priv->vbt.child_dev_num = 0; + struct display_device_data *devdata, *n; + + list_for_each_entry_safe(devdata, n, &dev_priv->vbt.display_devices, node) { + list_del(&devdata->node); + kfree(devdata->dsc); + kfree(devdata); + } + kfree(dev_priv->vbt.sdvo_lvds_vbt_mode); dev_priv->vbt.sdvo_lvds_vbt_mode = NULL; kfree(dev_priv->vbt.lfp_lvds_vbt_mode); @@ -1919,17 +2130,18 @@ void intel_bios_cleanup(struct drm_i915_private *dev_priv) */ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) { + const struct display_device_data *devdata; const struct child_device_config *child; - int i; if (!dev_priv->vbt.int_tv_support) return false; - if (!dev_priv->vbt.child_dev_num) + if (list_empty(&dev_priv->vbt.display_devices)) return true; - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; + /* * If the device type is not TV, continue. */ @@ -1961,14 +2173,14 @@ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) */ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin) { + const struct display_device_data *devdata; const struct child_device_config *child; - int i; - if (!dev_priv->vbt.child_dev_num) + if (list_empty(&dev_priv->vbt.display_devices)) return true; - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; /* If the device type is not LFP, continue. * We have to check both the new identifiers as well as the @@ -2010,6 +2222,7 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin) */ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port) { + const struct display_device_data *devdata; const struct child_device_config *child; static const struct { u16 dp, hdmi; @@ -2020,7 +2233,6 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, [PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, }, }; - int i; if (HAS_DDI(dev_priv)) { const struct ddi_vbt_port_info *port_info = @@ -2035,11 +2247,8 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping)) return false; - if (!dev_priv->vbt.child_dev_num) - return false; - - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; if ((child->dvo_port == port_mapping[port].dp || child->dvo_port == port_mapping[port].hdmi) && @@ -2060,6 +2269,7 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por */ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) { + const struct display_device_data *devdata; const struct child_device_config *child; static const short port_mapping[] = { [PORT_B] = DVO_PORT_DPB, @@ -2068,16 +2278,12 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) [PORT_E] = DVO_PORT_DPE, [PORT_F] = DVO_PORT_DPF, }; - int i; if (HAS_DDI(dev_priv)) return dev_priv->vbt.ddi_port_info[port].supports_edp; - if (!dev_priv->vbt.child_dev_num) - return false; - - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; if (child->dvo_port == port_mapping[port] && (child->device_type & DEVICE_TYPE_eDP_BITS) == @@ -2126,13 +2332,10 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child, bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) { - const struct child_device_config *child; - int i; - - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + const struct display_device_data *devdata; - if (child_dev_is_dp_dual_mode(child, port)) + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + if (child_dev_is_dp_dual_mode(&devdata->child, port)) return true; } @@ -2149,12 +2352,12 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port) { + const struct display_device_data *devdata; const struct child_device_config *child; u8 dvo_port; - int i; - for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - child = dev_priv->vbt.child_dev + i; + list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) { + child = &devdata->child; if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; @@ -2178,6 +2381,104 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, return false; } +static void fill_dsc(struct intel_crtc_state *crtc_state, + struct dsc_compression_parameters_entry *dsc, + int dsc_max_bpc) +{ + struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + int bpc = 8; + + vdsc_cfg->dsc_version_major = dsc->version_major; + vdsc_cfg->dsc_version_minor = dsc->version_minor; + + if (dsc->support_12bpc && dsc_max_bpc >= 12) + bpc = 12; + else if (dsc->support_10bpc && dsc_max_bpc >= 10) + bpc = 10; + else if (dsc->support_8bpc && dsc_max_bpc >= 8) + bpc = 8; + else + DRM_DEBUG_KMS("VBT: Unsupported BPC %d for DCS\n", + dsc_max_bpc); + + crtc_state->pipe_bpp = bpc * 3; + + crtc_state->dsc.compressed_bpp = min(crtc_state->pipe_bpp, + VBT_DSC_MAX_BPP(dsc->max_bpp)); + + /* + * FIXME: This is ugly, and slice count should take DSC engine + * throughput etc. into account. + * + * Also, per spec DSI supports 1, 2, 3 or 4 horizontal slices. + */ + if (dsc->slices_per_line & BIT(2)) { + crtc_state->dsc.slice_count = 4; + } else if (dsc->slices_per_line & BIT(1)) { + crtc_state->dsc.slice_count = 2; + } else { + /* FIXME */ + if (!(dsc->slices_per_line & BIT(0))) + DRM_DEBUG_KMS("VBT: Unsupported DSC slice count for DSI\n"); + + crtc_state->dsc.slice_count = 1; + } + + if (crtc_state->hw.adjusted_mode.crtc_hdisplay % + crtc_state->dsc.slice_count != 0) + DRM_DEBUG_KMS("VBT: DSC hdisplay %d not divisible by slice count %d\n", + crtc_state->hw.adjusted_mode.crtc_hdisplay, + crtc_state->dsc.slice_count); + + /* + * FIXME: Use VBT rc_buffer_block_size and rc_buffer_size for the + * implementation specific physical rate buffer size. Currently we use + * the required rate buffer model size calculated in + * drm_dsc_compute_rc_parameters() according to VESA DSC Annex E. + * + * The VBT rc_buffer_block_size and rc_buffer_size definitions + * correspond to DP 1.4 DPCD offsets 0x62 and 0x63. The DP DSC + * implementation should also use the DPCD (or perhaps VBT for eDP) + * provided value for the buffer size. + */ + + /* FIXME: DSI spec says bpc + 1 for this one */ + vdsc_cfg->line_buf_depth = VBT_DSC_LINE_BUFFER_DEPTH(dsc->line_buffer_depth); + + vdsc_cfg->block_pred_enable = dsc->block_prediction_enable; + + vdsc_cfg->slice_height = dsc->slice_height; +} + +/* FIXME: initially DSI specific */ +bool intel_bios_get_dsc_params(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int dsc_max_bpc) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + const struct display_device_data *devdata; + const struct child_device_config *child; + + list_for_each_entry(devdata, &i915->vbt.display_devices, node) { + child = &devdata->child; + + if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) + continue; + + if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) { + if (!devdata->dsc) + return false; + + if (crtc_state) + fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc); + + return true; + } + } + + return false; +} + /** * intel_bios_is_port_hpd_inverted - is HPD inverted for %port * @i915: i915 device instance @@ -2249,6 +2550,9 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, case DP_AUX_F: aux_ch = AUX_CH_F; break; + case DP_AUX_G: + aux_ch = AUX_CH_G; + break; default: MISSING_CASE(info->alternate_aux_channel); aux_ch = AUX_CH_A; diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 4e42cfaf61a7..d6a0c29d37ac 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -1,5 +1,5 @@ /* - * Copyright © 2016 Intel Corporation + * Copyright © 2016-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +35,9 @@ #include <drm/i915_drm.h> struct drm_i915_private; +struct intel_crtc_state; +struct intel_encoder; +enum port; enum intel_backlight_type { INTEL_BACKLIGHT_PMIC, @@ -42,6 +45,7 @@ enum intel_backlight_type { INTEL_BACKLIGHT_DISPLAY_DDI, INTEL_BACKLIGHT_DSI_DCS, INTEL_BACKLIGHT_PANEL_DRIVER_INTERFACE, + INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE, }; struct edp_power_seq { @@ -227,7 +231,7 @@ struct mipi_pps_data { } __packed; void intel_bios_init(struct drm_i915_private *dev_priv); -void intel_bios_cleanup(struct drm_i915_private *dev_priv); +void intel_bios_driver_remove(struct drm_i915_private *dev_priv); bool intel_bios_is_valid_vbt(const void *buf, size_t size); bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); @@ -240,5 +244,8 @@ bool intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915, bool intel_bios_is_lspcon_present(const struct drm_i915_private *i915, enum port port); enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port); +bool intel_bios_get_dsc_params(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int dsc_max_bpc); #endif /* _INTEL_BIOS_H_ */ diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 7b908e10d32e..b228671d5a5d 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -6,7 +6,7 @@ #include <drm/drm_atomic_state_helper.h> #include "intel_bw.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_sideband.h" /* Parameters for Qclk Geyserville (QGV) */ @@ -15,7 +15,7 @@ struct intel_qgv_point { }; struct intel_qgv_info { - struct intel_qgv_point points[3]; + struct intel_qgv_point points[I915_NUM_QGV_POINTS]; u8 num_points; u8 num_channels; u8 t_bl; @@ -35,28 +35,54 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, if (ret) return ret; - switch (val & 0xf) { - case 0: - qi->dram_type = INTEL_DRAM_DDR4; - break; - case 1: - qi->dram_type = INTEL_DRAM_DDR3; - break; - case 2: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - case 3: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - default: - MISSING_CASE(val & 0xf); - break; + if (IS_GEN(dev_priv, 12)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + case 4: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 5: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else if (IS_GEN(dev_priv, 11)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 1: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 2: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + qi->dram_type = INTEL_DRAM_LPDDR3; /* Conservative default */ } qi->num_channels = (val & 0xf0) >> 4; qi->num_points = (val & 0xf00) >> 8; - qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; + if (IS_GEN(dev_priv, 12)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 16; + else if (IS_GEN(dev_priv, 11)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; return 0; } @@ -65,7 +91,7 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, struct intel_qgv_point *sp, int point) { - u32 val = 0, val2; + u32 val = 0, val2 = 0; int ret; ret = sandybridge_pcode_read(dev_priv, @@ -132,20 +158,25 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) } struct intel_sa_info { - u8 deburst, mpagesize, deprogbwlimit, displayrtids; + u16 displayrtids; + u8 deburst, deprogbwlimit; }; static const struct intel_sa_info icl_sa_info = { .deburst = 8, - .mpagesize = 16, .deprogbwlimit = 25, /* GB/s */ .displayrtids = 128, }; -static int icl_get_bw_info(struct drm_i915_private *dev_priv) +static const struct intel_sa_info tgl_sa_info = { + .deburst = 16, + .deprogbwlimit = 34, /* GB/s */ + .displayrtids = 256, +}; + +static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; - const struct intel_sa_info *sa = &icl_sa_info; bool is_y_tile = true; /* assume y tile may be used */ int num_channels; int deinterleave; @@ -233,24 +264,41 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, void intel_bw_init_hw(struct drm_i915_private *dev_priv) { - if (IS_GEN(dev_priv, 11)) - icl_get_bw_info(dev_priv); + if (!HAS_DISPLAY(dev_priv)) + return; + + if (IS_GEN(dev_priv, 12)) + icl_get_bw_info(dev_priv, &tgl_sa_info); + else if (IS_GEN(dev_priv, 11)) + icl_get_bw_info(dev_priv, &icl_sa_info); } static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv, int num_planes) { - if (IS_GEN(dev_priv, 11)) + if (INTEL_GEN(dev_priv) >= 11) { + /* + * Any bw group has same amount of QGV points + */ + const struct intel_bw_info *bi = + &dev_priv->max_bw[0]; + unsigned int min_bw = UINT_MAX; + int i; + /* * FIXME with SAGV disabled maybe we can assume * point 1 will always be used? Seems to match * the behaviour observed in the wild. */ - return min3(icl_max_bw(dev_priv, num_planes, 0), - icl_max_bw(dev_priv, num_planes, 1), - icl_max_bw(dev_priv, num_planes, 2)); - else + for (i = 0; i < bi->num_qgv_points; i++) { + unsigned int bw = icl_max_bw(dev_priv, num_planes, i); + + min_bw = min(bw, min_bw); + } + return min_bw; + } else { return UINT_MAX; + } } static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) @@ -264,7 +312,7 @@ static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_stat static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); unsigned int data_rate = 0; enum plane_id plane_id; @@ -285,7 +333,7 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_ void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); bw_state->data_rate[crtc->pipe] = intel_bw_crtc_data_rate(crtc_state); @@ -322,6 +370,20 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, return data_rate; } +static struct intel_bw_state * +intel_atomic_get_bw_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_private_state *bw_state; + + bw_state = drm_atomic_get_private_obj_state(&state->base, + &dev_priv->bw_obj); + if (IS_ERR(bw_state)) + return ERR_CAST(bw_state); + + return to_intel_bw_state(bw_state); +} + int intel_bw_atomic_check(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -424,3 +486,8 @@ int intel_bw_init(struct drm_i915_private *dev_priv) return 0; } + +void intel_bw_cleanup(struct drm_i915_private *dev_priv) +{ + drm_atomic_private_obj_fini(&dev_priv->bw_obj); +} diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index e9d9c6d63bc3..20b9ad241802 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -8,7 +8,6 @@ #include <drm/drm_atomic.h> -#include "i915_drv.h" #include "intel_display.h" struct drm_i915_private; @@ -24,22 +23,9 @@ struct intel_bw_state { #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base) -static inline struct intel_bw_state * -intel_atomic_get_bw_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct drm_private_state *bw_state; - - bw_state = drm_atomic_get_private_obj_state(&state->base, - &dev_priv->bw_obj); - if (IS_ERR(bw_state)) - return ERR_CAST(bw_state); - - return to_intel_bw_state(bw_state); -} - void intel_bw_init_hw(struct drm_i915_private *dev_priv); int intel_bw_init(struct drm_i915_private *dev_priv); +void intel_bw_cleanup(struct drm_i915_private *dev_priv); int intel_bw_atomic_check(struct intel_atomic_state *state); void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0d19bbd08122..0ce5926006ca 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -21,8 +21,9 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_atomic.h" #include "intel_cdclk.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_sideband.h" /** @@ -545,10 +546,10 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, /* There are cases where we can end up here with power domains * off and a CDCLK frequency other than the minimum, like when * issuing a modeset without actually changing any display after - * a system suspend. So grab the PIPE-A domain, which covers + * a system suspend. So grab the display core domain, which covers * the HW blocks needed for the following programming. */ - wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DISPLAY_CORE); vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCK) | @@ -606,7 +607,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); } static void chv_set_cdclk(struct drm_i915_private *dev_priv, @@ -631,10 +632,10 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, /* There are cases where we can end up here with power domains * off and a CDCLK frequency other than the minimum, like when * issuing a modeset without actually changing any display after - * a system suspend. So grab the PIPE-A domain, which covers + * a system suspend. So grab the display core domain, which covers * the HW blocks needed for the following programming. */ - wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DISPLAY_CORE); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); @@ -653,7 +654,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); } static int bdw_calc_cdclk(int min_cdclk) @@ -969,9 +970,7 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - if (intel_wait_for_register(&dev_priv->uncore, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) + if (intel_de_wait_for_set(dev_priv, LCPLL1_CTL, LCPLL_PLL_LOCK, 5)) DRM_ERROR("DPLL0 not locked\n"); dev_priv->cdclk.hw.vco = vco; @@ -983,9 +982,7 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) static void skl_dpll0_disable(struct drm_i915_private *dev_priv) { I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(&dev_priv->uncore, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) + if (intel_de_wait_for_clear(dev_priv, LCPLL1_CTL, LCPLL_PLL_LOCK, 1)) DRM_ERROR("Couldn't disable DPLL0\n"); dev_priv->cdclk.hw.vco = 0; @@ -1165,28 +1162,88 @@ static void skl_uninit_cdclk(struct drm_i915_private *dev_priv) skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static int bxt_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 576000) - return 624000; - else if (min_cdclk > 384000) - return 576000; - else if (min_cdclk > 288000) - return 384000; - else if (min_cdclk > 144000) - return 288000; - else - return 144000; +static const struct intel_cdclk_vals bxt_cdclk_table[] = { + { .refclk = 19200, .cdclk = 144000, .divider = 8, .ratio = 60 }, + { .refclk = 19200, .cdclk = 288000, .divider = 4, .ratio = 60 }, + { .refclk = 19200, .cdclk = 384000, .divider = 3, .ratio = 60 }, + { .refclk = 19200, .cdclk = 576000, .divider = 2, .ratio = 60 }, + { .refclk = 19200, .cdclk = 624000, .divider = 2, .ratio = 65 }, + {} +}; + +static const struct intel_cdclk_vals glk_cdclk_table[] = { + { .refclk = 19200, .cdclk = 79200, .divider = 8, .ratio = 33 }, + { .refclk = 19200, .cdclk = 158400, .divider = 4, .ratio = 33 }, + { .refclk = 19200, .cdclk = 316800, .divider = 2, .ratio = 33 }, + {} +}; + +static const struct intel_cdclk_vals cnl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 }, + { .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 }, + { .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 }, + + { .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 }, + { .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 }, + { .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 }, + {} +}; + +static const struct intel_cdclk_vals icl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 }, + { .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 }, + { .refclk = 19200, .cdclk = 307200, .divider = 2, .ratio = 32 }, + { .refclk = 19200, .cdclk = 326400, .divider = 4, .ratio = 68 }, + { .refclk = 19200, .cdclk = 556800, .divider = 2, .ratio = 58 }, + { .refclk = 19200, .cdclk = 652800, .divider = 2, .ratio = 68 }, + + { .refclk = 24000, .cdclk = 180000, .divider = 2, .ratio = 15 }, + { .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 }, + { .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 }, + { .refclk = 24000, .cdclk = 324000, .divider = 4, .ratio = 54 }, + { .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 }, + { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 }, + + { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, + { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, + { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, + { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + {} +}; + +static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) +{ + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk >= min_cdclk) + return table[i].cdclk; + + WARN(1, "Cannot satisfy minimum cdclk %d with refclk %u\n", + min_cdclk, dev_priv->cdclk.hw.ref); + return 0; } -static int glk_calc_cdclk(int min_cdclk) +static int bxt_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { - if (min_cdclk > 158400) - return 316800; - else if (min_cdclk > 79200) - return 158400; - else - return 79200; + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + if (cdclk == dev_priv->cdclk.hw.bypass) + return 0; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk == cdclk) + return dev_priv->cdclk.hw.ref * table[i].ratio; + + WARN(1, "cdclk %d not valid for refclk %u\n", + cdclk, dev_priv->cdclk.hw.ref); + return 0; } static u8 bxt_calc_voltage_level(int cdclk) @@ -1194,69 +1251,101 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +static u8 cnl_calc_voltage_level(int cdclk) { - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) + if (cdclk > 336000) + return 2; + else if (cdclk > 168000) + return 1; + else return 0; +} - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } +static u8 icl_calc_voltage_level(int cdclk) +{ + if (cdclk > 556800) + return 2; + else if (cdclk > 312000) + return 1; + else + return 0; +} - return dev_priv->cdclk.hw.ref * ratio; +static u8 ehl_calc_voltage_level(int cdclk) +{ + if (cdclk > 326400) + return 3; + else if (cdclk > 312000) + return 2; + else if (cdclk > 180000) + return 1; + else + return 0; } -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +static void cnl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - int ratio; + if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) + cdclk_state->ref = 24000; + else + cdclk_state->ref = 19200; +} - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; +static void icl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 dssm = I915_READ(SKL_DSSM) & ICL_DSSM_CDCLK_PLL_REFCLK_MASK; - switch (cdclk) { + switch (dssm) { default: - MISSING_CASE(cdclk); + MISSING_CASE(dssm); /* fall through */ - case 79200: - case 158400: - case 316800: - ratio = 33; + case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: + cdclk_state->ref = 24000; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: + cdclk_state->ref = 19200; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: + cdclk_state->ref = 38400; break; } - - return dev_priv->cdclk.hw.ref * ratio; } -static void bxt_de_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) +static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - u32 val; + u32 val, ratio; - cdclk_state->ref = 19200; - cdclk_state->vco = 0; + if (INTEL_GEN(dev_priv) >= 11) + icl_readout_refclk(dev_priv, cdclk_state); + else if (IS_CANNONLAKE(dev_priv)) + cnl_readout_refclk(dev_priv, cdclk_state); + else + cdclk_state->ref = 19200; val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || + (val & BXT_DE_PLL_LOCK) == 0) { + /* + * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but + * setting it to zero is a way to signal that. + */ + cdclk_state->vco = 0; return; + } - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; + /* + * CNL+ have the ratio directly in the PLL enable register, gen9lp had + * it in a separate PLL control register. + */ + if (INTEL_GEN(dev_priv) >= 10) + ratio = val & CNL_CDCLK_PLL_RATIO_MASK; + else + ratio = I915_READ(BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK; - val = I915_READ(BXT_DE_PLL_CTL); - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; + cdclk_state->vco = ratio * cdclk_state->ref; } static void bxt_get_cdclk(struct drm_i915_private *dev_priv, @@ -1265,12 +1354,19 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, u32 divider; int div; - bxt_de_pll_update(dev_priv, cdclk_state); + bxt_de_pll_readout(dev_priv, cdclk_state); - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; + if (INTEL_GEN(dev_priv) >= 12) + cdclk_state->bypass = cdclk_state->ref / 2; + else if (INTEL_GEN(dev_priv) >= 11) + cdclk_state->bypass = 50000; + else + cdclk_state->bypass = cdclk_state->ref; - if (cdclk_state->vco == 0) + if (cdclk_state->vco == 0) { + cdclk_state->cdclk = cdclk_state->bypass; goto out; + } divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1279,13 +1375,15 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, div = 2; break; case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); div = 3; break; case BXT_CDCLK_CD2X_DIV_SEL_2: div = 4; break; case BXT_CDCLK_CD2X_DIV_SEL_4: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); div = 8; break; default: @@ -1301,7 +1399,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, * at least what the CDCLK frequency requires. */ cdclk_state->voltage_level = - bxt_calc_voltage_level(cdclk_state->cdclk); + dev_priv->display.calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1309,9 +1407,8 @@ static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) I915_WRITE(BXT_DE_PLL_ENABLE, 0); /* Timeout 200us */ - if (intel_wait_for_register(&dev_priv->uncore, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) + if (intel_de_wait_for_clear(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) DRM_ERROR("timeout waiting for DE PLL unlock\n"); dev_priv->cdclk.hw.vco = 0; @@ -1330,269 +1427,13 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); /* Timeout 200us */ - if (intel_wait_for_register(&dev_priv->uncore, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) + if (intel_de_wait_for_set(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) DRM_ERROR("timeout waiting for DE PLL lock\n"); dev_priv->cdclk.hw.vco = vco; } -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - int cdclk = cdclk_state->cdclk; - int vco = cdclk_state->vco; - u32 val, divider; - int ret; - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - default: - WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); - WARN_ON(vco != 0); - /* fall through */ - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - } - - /* - * Inform power controller of upcoming frequency change. BSpec - * requires us to wait up to 150usec, but that leads to timeouts; - * the 2ms used here is based on experiment. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000, 150, 2); - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); - - /* - * The timeout isn't specified, the 2ms used here is based on - * experiment. - * FIXME: Waiting for the request completion could be delayed until - * the next PCODE request based on BSpec. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_state->voltage_level, 150, 2); - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk.hw.cdclk >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk.hw.cdclk = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk.hw.vco = -1; -} - -static void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk.hw.cdclk != 0 && - dev_priv->cdclk.hw.vco != 0) - return; - - cdclk_state = dev_priv->cdclk.hw; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) { - cdclk_state.cdclk = glk_calc_cdclk(0); - cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); - } else { - cdclk_state.cdclk = bxt_calc_cdclk(0); - cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); - } - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - -static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - -static int cnl_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 336000) - return 528000; - else if (min_cdclk > 168000) - return 336000; - else - return 168000; -} - -static u8 cnl_calc_voltage_level(int cdclk) -{ - if (cdclk > 336000) - return 2; - else if (cdclk > 168000) - return 1; - else - return 0; -} - -static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - - if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) - cdclk_state->ref = 24000; - else - cdclk_state->ref = 19200; - - cdclk_state->vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - cdclk_state->vco = (val & CNL_CDCLK_PLL_RATIO_MASK) * cdclk_state->ref; -} - -static void cnl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 divider; - int div; - - cnl_cdclk_pll_update(dev_priv, cdclk_state); - - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; - - if (cdclk_state->vco == 0) - goto out; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - default: - MISSING_CASE(divider); - return; - } - - cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); - - out: - /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. - */ - cdclk_state->voltage_level = - cnl_calc_voltage_level(cdclk_state->cdclk); -} - static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { u32 val; @@ -1626,7 +1467,27 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static void cnl_set_cdclk(struct drm_i915_private *dev_priv, +static u32 bxt_cdclk_cd2x_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + if (INTEL_GEN(dev_priv) >= 12) { + if (pipe == INVALID_PIPE) + return TGL_CDCLK_CD2X_PIPE_NONE; + else + return TGL_CDCLK_CD2X_PIPE(pipe); + } else if (INTEL_GEN(dev_priv) >= 11) { + if (pipe == INVALID_PIPE) + return ICL_CDCLK_CD2X_PIPE_NONE; + else + return ICL_CDCLK_CD2X_PIPE(pipe); + } else { + if (pipe == INVALID_PIPE) + return BXT_CDCLK_CD2X_PIPE_NONE; + else + return BXT_CDCLK_CD2X_PIPE(pipe); + } +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state, enum pipe pipe) { @@ -1635,17 +1496,28 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); + /* Inform power controller of upcoming frequency change. */ + if (INTEL_GEN(dev_priv) >= 10) + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + else + /* + * BSpec requires us to wait up to 150usec, but that leads to + * timeouts; the 2ms used here is based on experiment. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 150, 2); + if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); + DRM_ERROR("Failed to inform PCU about cdclk change (err %d, freq %d)\n", + ret, cdclk); return; } - /* cdclk = vco / 2 / div{1,2} */ + /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { default: WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); @@ -1654,67 +1526,87 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, case 2: divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; + case 3: + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; case 4: divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; + case 8: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; } - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); + if (INTEL_GEN(dev_priv) >= 10) { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_disable(dev_priv); - val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - I915_WRITE(CDCLK_CTL, val); + if (dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_enable(dev_priv, vco); - if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); + } else { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); - /* inform PCU of the change */ - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + } - intel_update_cdclk(dev_priv); + val = divider | skl_cdclk_decimal(cdclk) | + bxt_cdclk_cd2x_pipe(dev_priv, pipe); /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} + if (IS_GEN9_LP(dev_priv) && cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); -static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; + if (pipe != INVALID_PIPE) + intel_wait_for_vblank(dev_priv, pipe); - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; + if (INTEL_GEN(dev_priv) >= 10) { + ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); + } else { + /* + * The timeout isn't specified, the 2ms used here is based on + * experiment. + * FIXME: Waiting for the request completion could be delayed + * until the next PCODE request based on BSpec. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level, + 150, 2); + } - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 168000: - case 336000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; - break; - case 528000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 55 : 44; - break; + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; } - return dev_priv->cdclk.hw.ref * ratio; + intel_update_cdclk(dev_priv); + + if (INTEL_GEN(dev_priv) >= 10) + /* + * Can't read out the voltage level :( + * Let's just assume everything is as expected. + */ + dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } -static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; + int cdclk, vco; intel_update_cdclk(dev_priv); intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); @@ -1735,239 +1627,65 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) * dividers both synching to an active pipe, or asynchronously * (PIPE_NONE). */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; + cdctl &= ~bxt_cdclk_cd2x_pipe(dev_priv, INVALID_PIPE); -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk.hw.cdclk = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk.hw.vco = -1; -} - -static int icl_calc_cdclk(int min_cdclk, unsigned int ref) -{ - int ranges_24[] = { 312000, 552000, 648000 }; - int ranges_19_38[] = { 307200, 556800, 652800 }; - int *ranges; - - switch (ref) { - default: - MISSING_CASE(ref); - /* fall through */ - case 24000: - ranges = ranges_24; - break; - case 19200: - case 38400: - ranges = ranges_19_38; - break; - } - - if (min_cdclk > ranges[1]) - return ranges[2]; - else if (min_cdclk > ranges[0]) - return ranges[1]; - else - return ranges[0]; -} + /* Make sure this is a legal cdclk value for the platform */ + cdclk = bxt_calc_cdclk(dev_priv, dev_priv->cdclk.hw.cdclk); + if (cdclk != dev_priv->cdclk.hw.cdclk) + goto sanitize; -static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; + /* Make sure the VCO is correct for the cdclk */ + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); + if (vco != dev_priv->cdclk.hw.vco) + goto sanitize; - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; + expected = skl_cdclk_decimal(cdclk); - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 307200: - case 556800: - case 652800: - WARN_ON(dev_priv->cdclk.hw.ref != 19200 && - dev_priv->cdclk.hw.ref != 38400); + /* Figure out what CD2X divider we should be using for this cdclk */ + switch (DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.cdclk)) { + case 2: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1; break; - case 312000: - case 552000: - case 648000: - WARN_ON(dev_priv->cdclk.hw.ref != 24000); - } - - ratio = cdclk / (dev_priv->cdclk.hw.ref / 2); - - return dev_priv->cdclk.hw.ref * ratio; -} - -static void icl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - unsigned int cdclk = cdclk_state->cdclk; - unsigned int vco = cdclk_state->vco; - int ret; - - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); - - /* - * On ICL CD2X_DIV can only be 1, so we'll never end up changing the - * divider here synchronized to a pipe while CDCLK is on, nor will we - * need the corresponding vblank wait. - */ - I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | - skl_cdclk_decimal(cdclk)); - - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); - - intel_update_cdclk(dev_priv); - - /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. - */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} - -static u8 icl_calc_voltage_level(int cdclk) -{ - if (cdclk > 556800) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; -} - -static void icl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - - cdclk_state->bypass = 50000; - - val = I915_READ(SKL_DSSM); - switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { - default: - MISSING_CASE(val); - /* fall through */ - case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: - cdclk_state->ref = 24000; + case 3: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1_5; break; - case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: - cdclk_state->ref = 19200; + case 4: + expected |= BXT_CDCLK_CD2X_DIV_SEL_2; break; - case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: - cdclk_state->ref = 38400; + case 8: + expected |= BXT_CDCLK_CD2X_DIV_SEL_4; break; + default: + goto sanitize; } - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || - (val & BXT_DE_PLL_LOCK) == 0) { - /* - * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but - * setting it to zero is a way to signal that. - */ - cdclk_state->vco = 0; - cdclk_state->cdclk = cdclk_state->bypass; - goto out; - } - - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; - - val = I915_READ(CDCLK_CTL); - WARN_ON((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0); - - cdclk_state->cdclk = cdclk_state->vco / 2; - -out: /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. */ - cdclk_state->voltage_level = - icl_calc_voltage_level(cdclk_state->cdclk); -} - -static void icl_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state sanitized_state; - u32 val; - - /* This sets dev_priv->cdclk.hw. */ - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - /* This means CDCLK disabled. */ - if (dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - val = I915_READ(CDCLK_CTL); - - if ((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0) - goto sanitize; - - if ((val & CDCLK_FREQ_DECIMAL_MASK) != - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk)) - goto sanitize; + if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - return; + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; sanitize: DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - sanitized_state.ref = dev_priv->cdclk.hw.ref; - sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref); - sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv, - sanitized_state.cdclk); - sanitized_state.voltage_level = - icl_calc_voltage_level(sanitized_state.cdclk); - - icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); -} - -static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - cdclk_state.voltage_level = icl_calc_voltage_level(cdclk_state.cdclk); + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; - icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; } -static void cnl_init_cdclk(struct drm_i915_private *dev_priv) +static void bxt_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state; - cnl_sanitize_cdclk(dev_priv); + bxt_sanitize_cdclk(dev_priv); if (dev_priv->cdclk.hw.cdclk != 0 && dev_priv->cdclk.hw.vco != 0) @@ -1975,22 +1693,29 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cnl_calc_cdclk(0); - cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); + cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) +static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2004,14 +1729,10 @@ static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) */ void intel_cdclk_init(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_init_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_init_cdclk(i915); + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + bxt_init_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_init_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_init_cdclk(i915); } /** @@ -2023,14 +1744,10 @@ void intel_cdclk_init(struct drm_i915_private *i915) */ void intel_cdclk_uninit(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_uninit_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_uninit_cdclk(i915); + if (INTEL_GEN(i915) >= 10 || IS_GEN9_LP(i915)) + bxt_uninit_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_uninit_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_uninit_cdclk(i915); } /** @@ -2058,9 +1775,9 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, * Returns: * True if the CDCLK states require just a cd2x divider update, false if not. */ -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { /* Older hw doesn't have the capability */ if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) @@ -2079,8 +1796,8 @@ bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, * Returns: * True if the CDCLK states don't match, false if they do. */ -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { return intel_cdclk_needs_modeset(a, b) || a->voltage_level != b->voltage_level; @@ -2185,9 +1902,11 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, intel_set_cdclk(dev_priv, new_state, pipe); } -static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, - int pixel_rate) +static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + int pixel_rate = crtc_state->pixel_rate; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return DIV_ROUND_UP(pixel_rate, 2); else if (IS_GEN(dev_priv, 9) || @@ -2195,20 +1914,35 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, return pixel_rate; else if (IS_CHERRYVIEW(dev_priv)) return DIV_ROUND_UP(pixel_rate * 100, 95); + else if (crtc_state->double_wide) + return DIV_ROUND_UP(pixel_rate * 100, 90 * 2); else return DIV_ROUND_UP(pixel_rate * 100, 90); } +static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; + int min_cdclk = 0; + + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + min_cdclk = max(crtc_state->min_cdclk[plane->id], min_cdclk); + + return min_cdclk; +} + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = - to_i915(crtc_state->base.crtc->dev); + to_i915(crtc_state->uapi.crtc->dev); int min_cdclk; - if (!crtc_state->base.enable) + if (!crtc_state->hw.enable) return 0; - min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate); + min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) @@ -2267,6 +2001,21 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) IS_GEMINILAKE(dev_priv)) min_cdclk = max(158400, min_cdclk); + /* Account for additional needs from the planes */ + min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); + + /* + * HACK. Currently for TGL platforms we calculate + * min_cdclk initially based on pixel_rate divided + * by 2, accounting for also plane requirements, + * however in some cases the lowest possible CDCLK + * doesn't work and causing the underruns. + * Explicitly stating here that this seems to be currently + * rather a Hack, than final solution. + */ + if (IS_TIGERLAKE(dev_priv)) + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); + if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); @@ -2288,11 +2037,20 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) sizeof(state->min_cdclk)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret; + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); if (min_cdclk < 0) return min_cdclk; + if (state->min_cdclk[i] == min_cdclk) + continue; + state->min_cdclk[i] = min_cdclk; + + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; } min_cdclk = state->cdclk.force_min_cdclk; @@ -2303,6 +2061,10 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) } /* + * Account for port clock min voltage level requirements. + * This only really does something on CNL+ but can be + * called on earlier platforms as well. + * * Note that this functions assumes that 0 is * the lowest voltage value, and higher values * correspond to increasingly higher voltages. @@ -2311,7 +2073,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * future platforms this code will need to be * adjusted. */ -static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) +static int bxt_compute_min_voltage_level(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -2324,11 +2086,21 @@ static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) sizeof(state->min_voltage_level)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - if (crtc_state->base.enable) - state->min_voltage_level[i] = - crtc_state->min_voltage_level; + int ret; + + if (crtc_state->hw.enable) + min_voltage_level = crtc_state->min_voltage_level; else - state->min_voltage_level[i] = 0; + min_voltage_level = 0; + + if (state->min_voltage_level[i] == min_voltage_level) + continue; + + state->min_voltage_level[i] = min_voltage_level; + + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; } min_voltage_level = 0; @@ -2354,7 +2126,7 @@ static int vlv_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = vlv_calc_voltage_level(dev_priv, cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = vlv_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2385,7 +2157,7 @@ static int bdw_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = bdw_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = bdw_calc_cdclk(state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2410,7 +2182,7 @@ static int skl_dpll0_vco(struct intel_atomic_state *state) vco = dev_priv->skl_preferred_vco_freq; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - if (!crtc_state->base.enable) + if (!crtc_state->hw.enable) continue; if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) @@ -2455,7 +2227,7 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = skl_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = skl_calc_cdclk(state->cdclk.force_min_cdclk, vco); state->cdclk.actual.vco = vco; @@ -2472,38 +2244,33 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; + int min_cdclk, min_voltage_level, cdclk, vco; min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + min_voltage_level = bxt_compute_min_voltage_level(state); + if (min_voltage_level < 0) + return min_voltage_level; + + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = - bxt_calc_voltage_level(cdclk); - - if (!state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(state->cdclk.force_min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + max_t(int, min_voltage_level, + dev_priv->display.calc_voltage_level(cdclk)); + + if (!state->active_pipes) { + cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; state->cdclk.actual.voltage_level = - bxt_calc_voltage_level(cdclk); + dev_priv->display.calc_voltage_level(cdclk); } else { state->cdclk.actual = state->cdclk.logical; } @@ -2511,70 +2278,138 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } -static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) +static int intel_modeset_all_pipes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; + struct intel_crtc *crtc; - min_cdclk = intel_compute_min_cdclk(state); - if (min_cdclk < 0) - return min_cdclk; + /* + * Add all pipes to the state, and force + * a modeset on all the active ones. + */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + int ret; - cdclk = cnl_calc_cdclk(min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(cnl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); + if (!crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&crtc_state->uapi)) + continue; - if (!state->active_crtcs) { - cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + crtc_state->uapi.mode_changed = true; - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - cnl_calc_voltage_level(cdclk); - } else { - state->cdclk.actual = state->cdclk.logical; + ret = drm_atomic_add_affected_connectors(&state->base, + &crtc->base); + if (ret) + return ret; + + ret = drm_atomic_add_affected_planes(&state->base, + &crtc->base); + if (ret) + return ret; + + crtc_state->update_planes |= crtc_state->active_planes; } return 0; } -static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) +static int fixed_modeset_calc_cdclk(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - unsigned int ref = state->cdclk.logical.ref; - int min_cdclk, cdclk, vco; + int min_cdclk; + /* + * We can't change the cdclk frequency, but we still want to + * check that the required minimum frequency doesn't exceed + * the actual cdclk frequency. + */ min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; - cdclk = icl_calc_cdclk(min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + return 0; +} - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(icl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); +int intel_modeset_calc_cdclk(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + enum pipe pipe; + int ret; - if (!state->active_crtcs) { - cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + ret = dev_priv->display.modeset_calc_cdclk(state); + if (ret) + return ret; - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - icl_calc_voltage_level(cdclk); + /* + * Writes to dev_priv->cdclk.{actual,logical} must protected + * by holding all the crtc mutexes even if we don't end up + * touching the hardware + */ + if (intel_cdclk_changed(&dev_priv->cdclk.actual, + &state->cdclk.actual)) { + /* + * Also serialize commits across all crtcs + * if the actual hw needs to be poked. + */ + ret = intel_atomic_serialize_global_state(state); + if (ret) + return ret; + } else if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &state->cdclk.logical)) { + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; } else { - state->cdclk.actual = state->cdclk.logical; + return 0; + } + + if (is_power_of_2(state->active_pipes) && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &state->cdclk.actual)) { + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + + pipe = ilog2(state->active_pipes); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi)) + pipe = INVALID_PIPE; + } else { + pipe = INVALID_PIPE; + } + + if (pipe != INVALID_PIPE) { + state->cdclk.pipe = pipe; + + DRM_DEBUG_KMS("Can change cdclk with pipe %c active\n", + pipe_name(pipe)); + } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &state->cdclk.actual)) { + /* All pipes must be switched off while we change the cdclk. */ + ret = intel_modeset_all_pipes(state); + if (ret) + return ret; + + state->cdclk.pipe = INVALID_PIPE; + + DRM_DEBUG_KMS("Modeset required for cdclk change\n"); } + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + state->cdclk.logical.cdclk, + state->cdclk.actual.cdclk); + DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", + state->cdclk.logical.voltage_level, + state->cdclk.actual.voltage_level); + return 0; } @@ -2605,7 +2440,12 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) */ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) { + if (IS_ELKHARTLAKE(dev_priv)) { + if (dev_priv->cdclk.hw.ref == 24000) + dev_priv->max_cdclk_freq = 552000; + else + dev_priv->max_cdclk_freq = 556800; + } else if (INTEL_GEN(dev_priv) >= 11) { if (dev_priv->cdclk.hw.ref == 24000) dev_priv->max_cdclk_freq = 648000; else @@ -2789,15 +2629,29 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) { - dev_priv->display.set_cdclk = icl_set_cdclk; - dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + if (IS_ELKHARTLAKE(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; + dev_priv->cdclk.table = icl_cdclk_table; + } else if (INTEL_GEN(dev_priv) >= 11) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = icl_calc_voltage_level; + dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->display.set_cdclk = cnl_set_cdclk; - dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; + dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = bxt_calc_voltage_level; + if (IS_GEMINILAKE(dev_priv)) + dev_priv->cdclk.table = glk_cdclk_table; + else + dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; @@ -2810,13 +2664,11 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv)) { dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; + } else { + dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } - if (INTEL_GEN(dev_priv) >= 11) - dev_priv->display.get_cdclk = icl_get_cdclk; - else if (IS_CANNONLAKE(dev_priv)) - dev_priv->display.get_cdclk = cnl_get_cdclk; - else if (IS_GEN9_LP(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; else if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_cdclk = skl_get_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 4d6f7f5f8930..cf71394cc79c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -15,6 +15,13 @@ struct intel_atomic_state; struct intel_cdclk_state; struct intel_crtc_state; +struct intel_cdclk_vals { + u16 refclk; + u32 cdclk; + u8 divider; /* CD2X divider * 2 */ + u8 ratio; +}; + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); void intel_cdclk_init(struct drm_i915_private *i915); void intel_cdclk_uninit(struct drm_i915_private *i915); @@ -22,13 +29,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); void intel_cdclk_swap_state(struct intel_atomic_state *state); void intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv, @@ -42,5 +44,6 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, enum pipe pipe); void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context); +int intel_modeset_calc_cdclk(struct intel_atomic_state *state); #endif /* __INTEL_CDCLK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 23a84dd7989f..3980e8b50c28 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -23,7 +23,7 @@ */ #include "intel_color.h" -#include "intel_drv.h" +#include "intel_display_types.h" #define CTM_COEFF_SIGN (1ULL << 63) @@ -43,6 +43,21 @@ #define LEGACY_LUT_LENGTH 256 /* + * ILK+ csc matrix: + * + * |R/Cr| | c0 c1 c2 | ( |R/Cr| |preoff0| ) |postoff0| + * |G/Y | = | c3 c4 c5 | x ( |G/Y | + |preoff1| ) + |postoff1| + * |B/Cb| | c6 c7 c8 | ( |B/Cb| |preoff2| ) |postoff2| + * + * ILK/SNB don't have explicit post offsets, and instead + * CSC_MODE_YUV_TO_RGB and CSC_BLACK_SCREEN_OFFSET are used: + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=0 -> 1/2, 0, 1/2 + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/2, 1/16, 1/2 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=0 -> 0, 0, 0 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/16, 1/16, 1/16 + */ + +/* * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point * format). This macro takes the coefficient we want transformed and the * number of fractional bits. @@ -59,37 +74,38 @@ #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255) +/* Nop pre/post offsets */ static const u16 ilk_csc_off_zero[3] = {}; +/* Identity matrix */ static const u16 ilk_csc_coeff_identity[9] = { ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, }; +/* Limited range RGB post offsets */ static const u16 ilk_csc_postoff_limited_range[3] = { ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, }; +/* Full range RGB -> limited range RGB matrix */ static const u16 ilk_csc_coeff_limited_range[9] = { ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, }; -/* - * These values are direct register values specified in the Bspec, - * for RGB->YUV conversion matrix (colorspace BT709) - */ +/* BT.709 full range RGB -> limited range YCbCr matrix */ static const u16 ilk_csc_coeff_rgb_to_ycbcr[9] = { 0x1e08, 0x9cc0, 0xb528, 0x2ba8, 0x09d8, 0x37e8, 0xbce8, 0x9ad8, 0x1e08, }; -/* Post offset values for RGB->YCBCR conversion */ +/* Limited range YCbCr post offsets */ static const u16 ilk_csc_postoff_rgb_to_ycbcr[3] = { 0x0800, 0x0100, 0x0800, }; @@ -101,10 +117,10 @@ static bool lut_is_legacy(const struct drm_property_blob *lut) static bool crtc_state_is_legacy_gamma(const struct intel_crtc_state *crtc_state) { - return !crtc_state->base.degamma_lut && - !crtc_state->base.ctm && - crtc_state->base.gamma_lut && - lut_is_legacy(crtc_state->base.gamma_lut); + return !crtc_state->hw.degamma_lut && + !crtc_state->hw.ctm && + crtc_state->hw.gamma_lut && + lut_is_legacy(crtc_state->hw.gamma_lut); } /* @@ -189,7 +205,7 @@ static void icl_update_output_csc(struct intel_crtc *crtc, static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); /* * FIXME if there's a gamma LUT after the CSC, we should @@ -203,7 +219,7 @@ static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state) static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state, u16 coeffs[9]) { - const struct drm_color_ctm *ctm = crtc_state->base.ctm->data; + const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data; const u64 *input; u64 temp[9]; int i; @@ -254,11 +270,11 @@ static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state, static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); bool limited_color_range = ilk_csc_limited_range(crtc_state); - if (crtc_state->base.ctm) { + if (crtc_state->hw.ctm) { u16 coeff[9]; ilk_csc_convert_ctm(crtc_state, coeff); @@ -293,10 +309,10 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (crtc_state->base.ctm) { + if (crtc_state->hw.ctm) { u16 coeff[9]; ilk_csc_convert_ctm(crtc_state, coeff); @@ -322,12 +338,12 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state) */ static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - if (crtc_state->base.ctm) { - const struct drm_color_ctm *ctm = crtc_state->base.ctm->data; + if (crtc_state->hw.ctm) { + const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data; u16 coeffs[9] = {}; int i; @@ -388,7 +404,7 @@ static u32 ilk_lut_10(const struct drm_color_lut *color) static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, const struct drm_property_blob *blob) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; int i; @@ -419,12 +435,12 @@ static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) { - i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); + i9xx_load_luts_internal(crtc_state, crtc_state->hw.gamma_lut); } static void i9xx_color_commit(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val; @@ -437,7 +453,7 @@ static void i9xx_color_commit(const struct intel_crtc_state *crtc_state) static void ilk_color_commit(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val; @@ -452,7 +468,7 @@ static void ilk_color_commit(const struct intel_crtc_state *crtc_state) static void hsw_color_commit(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode); @@ -462,7 +478,7 @@ static void hsw_color_commit(const struct intel_crtc_state *crtc_state) static void skl_color_commit(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val = 0; @@ -508,8 +524,8 @@ static void i965_load_lut_10p6(struct intel_crtc *crtc, static void i965_load_luts(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) i9xx_load_luts(crtc_state); @@ -531,8 +547,8 @@ static void ilk_load_lut_10(struct intel_crtc *crtc, static void ilk_load_luts(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) i9xx_load_luts(crtc_state); @@ -611,12 +627,13 @@ static void bdw_load_lut_10(struct intel_crtc *crtc, static void ivb_load_lut_ext_max(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Program the max register to clamp values > 1.0. */ - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); /* * Program the gc max 2 register to clamp values > 1.0. @@ -624,17 +641,22 @@ static void ivb_load_lut_ext_max(struct intel_crtc *crtc) * from 3.0 to 7.0 */ if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 0), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 1), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 2), + 1 << 16); } + + intel_dsb_put(dsb); } static void ivb_load_luts(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut; if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { i9xx_load_luts(crtc_state); @@ -655,9 +677,9 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state) static void bdw_load_luts(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut; if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { i9xx_load_luts(crtc_state); @@ -678,11 +700,11 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state) static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - const struct drm_color_lut *lut = crtc_state->base.degamma_lut->data; + const struct drm_color_lut *lut = crtc_state->hw.degamma_lut->data; u32 i; /* @@ -717,7 +739,7 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; @@ -744,8 +766,8 @@ static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_stat static void glk_load_luts(const struct intel_crtc_state *crtc_state) { - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); /* * On GLK+ both pipe CSC and degamma LUT are controlled @@ -755,7 +777,7 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) * the degama LUT so that we don't have to reload * it every time the pipe CSC is being enabled. */ - if (crtc_state->base.degamma_lut) + if (crtc_state->hw.degamma_lut) glk_load_degamma_lut(crtc_state); else glk_load_degamma_lut_linear(crtc_state); @@ -786,79 +808,84 @@ static void icl_load_gcmax(const struct intel_crtc_state *crtc_state, const struct drm_color_lut *color) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_put(dsb); } static void icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_property_blob *blob = crtc_state->base.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *blob = crtc_state->hw.gamma_lut; const struct drm_color_lut *lut = blob->data; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; /* - * Every entry in the multi-segment LUT is corresponding to a superfine - * segment step which is 1/(8 * 128 * 256). + * Program Super Fine segment (let's call it seg1)... * - * Superfine segment has 9 entries, corresponding to values - * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). + * Super Fine segment's step is 1/(8 * 128 * 256) and it has + * 9 entries, corresponding to values 0, 1/(8 * 128 * 256), + * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256). */ - I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(dsb, PREC_PAL_MULTI_SEG_INDEX(pipe), + PAL_PREC_AUTO_INCREMENT); for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); } + + intel_dsb_put(dsb); } static void icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_property_blob *blob = crtc_state->base.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *blob = crtc_state->hw.gamma_lut; const struct drm_color_lut *lut = blob->data; const struct drm_color_lut *entry; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; /* - * * Program Fine segment (let's call it seg2)... * - * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) - * ... 256/(128*256). So in order to program fine segment of LUT we - * need to pick every 8'th entry in LUT, and program 256 indexes. + * Fine segment's step is 1/(128 * 256) i.e. 1/(128 * 256), 2/(128 * 256) + * ... 256/(128 * 256). So in order to program fine segment of LUT we + * need to pick every 8th entry in the LUT, and program 256 indexes. * * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], - * with seg2[0] being unused by the hardware. + * seg2[0] being unused by the hardware. */ - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(dsb, PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* * Program Coarse segment (let's call it seg3)... * - * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, - * 1/256, 2/256 ...256/256. As per the description of each entry in LUT + * Coarse segment starts from index 0 and it's step is 1/256 ie 0, + * 1/256, 2/256 ... 256/256. As per the description of each entry in LUT * above, we need to pick every (8 * 128)th entry in LUT, and * program 256 of those. * @@ -868,38 +895,43 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) */ for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* The last entry in the LUT is to be programmed in GCMAX */ entry = &lut[256 * 8 * 128]; icl_load_gcmax(crtc_state, entry); ivb_load_lut_ext_max(crtc); + intel_dsb_put(dsb); } static void icl_load_luts(const struct intel_crtc_state *crtc_state) { - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_dsb *dsb = intel_dsb_get(crtc); - if (crtc_state->base.degamma_lut) + if (crtc_state->hw.degamma_lut) glk_load_degamma_lut(crtc_state); switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { case GAMMA_MODE_MODE_8BIT: i9xx_load_luts(crtc_state); break; - case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: icl_program_gamma_superfine_segment(crtc_state); icl_program_gamma_multi_segment(crtc_state); break; - default: bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); } + + intel_dsb_commit(dsb); + intel_dsb_put(dsb); } static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color) @@ -958,9 +990,9 @@ static void chv_load_cgm_gamma(struct intel_crtc *crtc, static void chv_load_luts(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut; cherryview_load_csc_matrix(crtc_state); @@ -978,28 +1010,77 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state) void intel_color_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); dev_priv->display.load_luts(crtc_state); } void intel_color_commit(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); dev_priv->display.color_commit(crtc_state); } +static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->uapi.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + return !old_crtc_state->hw.gamma_lut && + !old_crtc_state->hw.degamma_lut; +} + +static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->uapi.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * CGM_PIPE_MODE is itself single buffered. We'd have to + * somehow split it out from chv_load_luts() if we wanted + * the ability to preload the CGM LUTs/CSC without tearing. + */ + if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode) + return false; + + return !old_crtc_state->hw.gamma_lut; +} + +static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->uapi.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * The hardware degamma is active whenever the pipe + * CSC is active. Thus even if the old state has no + * software degamma we need to avoid clobbering the + * linear hardware degamma mid scanout. + */ + return !old_crtc_state->csc_enable && + !old_crtc_state->hw.gamma_lut; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); return dev_priv->display.color_check(crtc_state); } void intel_color_get_config(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); if (dev_priv->display.read_luts) dev_priv->display.read_luts(crtc_state); @@ -1023,16 +1104,16 @@ static bool need_plane_update(struct intel_plane *plane, static int intel_color_add_affected_planes(struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = - to_intel_atomic_state(new_crtc_state->base.state); + to_intel_atomic_state(new_crtc_state->uapi.state); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); struct intel_plane *plane; - if (!new_crtc_state->base.active || - drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) return 0; if (new_crtc_state->gamma_enable == old_crtc_state->gamma_enable && @@ -1074,9 +1155,9 @@ static int check_lut_size(const struct drm_property_blob *lut, int expected) static int check_luts(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut; int gamma_length, degamma_length; u32 gamma_tests, degamma_tests; @@ -1124,7 +1205,7 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) return ret; crtc_state->gamma_enable = - crtc_state->base.gamma_lut && + crtc_state->hw.gamma_lut && !crtc_state->c8_planes; crtc_state->gamma_mode = i9xx_gamma_mode(crtc_state); @@ -1133,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1143,11 +1226,11 @@ static u32 chv_cgm_mode(const struct intel_crtc_state *crtc_state) if (crtc_state_is_legacy_gamma(crtc_state)) return 0; - if (crtc_state->base.degamma_lut) + if (crtc_state->hw.degamma_lut) cgm_mode |= CGM_PIPE_MODE_DEGAMMA; - if (crtc_state->base.ctm) + if (crtc_state->hw.ctm) cgm_mode |= CGM_PIPE_MODE_CSC; - if (crtc_state->base.gamma_lut) + if (crtc_state->hw.gamma_lut) cgm_mode |= CGM_PIPE_MODE_GAMMA; return cgm_mode; @@ -1185,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = chv_can_preload_luts(crtc_state); + return 0; } @@ -1197,6 +1282,21 @@ static u32 ilk_gamma_mode(const struct intel_crtc_state *crtc_state) return GAMMA_MODE_MODE_10BIT; } +static u32 ilk_csc_mode(const struct intel_crtc_state *crtc_state) +{ + /* + * CSC comes after the LUT in RGB->YCbCr mode. + * RGB->YCbCr needs the limited range offsets added to + * the output. RGB limited range output is handled by + * the hw automagically elsewhere. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return CSC_BLACK_SCREEN_OFFSET; + + return CSC_MODE_YUV_TO_RGB | + CSC_POSITION_BEFORE_GAMMA; +} + static int ilk_color_check(struct intel_crtc_state *crtc_state) { int ret; @@ -1206,24 +1306,26 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) return ret; crtc_state->gamma_enable = - crtc_state->base.gamma_lut && + crtc_state->hw.gamma_lut && !crtc_state->c8_planes; /* - * We don't expose the ctm on ilk/snb currently, - * nor do we enable YCbCr output. Also RGB limited - * range output is handled by the hw automagically. + * We don't expose the ctm on ilk/snb currently, also RGB + * limited range output is handled by the hw automagically. */ - crtc_state->csc_enable = false; + crtc_state->csc_enable = + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB; crtc_state->gamma_mode = ilk_gamma_mode(crtc_state); - crtc_state->csc_mode = 0; + crtc_state->csc_mode = ilk_csc_mode(crtc_state); ret = intel_color_add_affected_planes(crtc_state); if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1232,8 +1334,8 @@ static u32 ivb_gamma_mode(const struct intel_crtc_state *crtc_state) if (!crtc_state->gamma_enable || crtc_state_is_legacy_gamma(crtc_state)) return GAMMA_MODE_MODE_8BIT; - else if (crtc_state->base.gamma_lut && - crtc_state->base.degamma_lut) + else if (crtc_state->hw.gamma_lut && + crtc_state->hw.degamma_lut) return GAMMA_MODE_MODE_SPLIT; else return GAMMA_MODE_MODE_10BIT; @@ -1247,7 +1349,7 @@ static u32 ivb_csc_mode(const struct intel_crtc_state *crtc_state) * CSC comes after the LUT in degamma, RGB->YCbCr, * and RGB full->limited range mode. */ - if (crtc_state->base.degamma_lut || + if (crtc_state->hw.degamma_lut || crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB || limited_color_range) return 0; @@ -1265,13 +1367,13 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) return ret; crtc_state->gamma_enable = - (crtc_state->base.gamma_lut || - crtc_state->base.degamma_lut) && + (crtc_state->hw.gamma_lut || + crtc_state->hw.degamma_lut) && !crtc_state->c8_planes; crtc_state->csc_enable = crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB || - crtc_state->base.ctm || limited_color_range; + crtc_state->hw.ctm || limited_color_range; crtc_state->gamma_mode = ivb_gamma_mode(crtc_state); @@ -1281,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1302,14 +1406,14 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) return ret; crtc_state->gamma_enable = - crtc_state->base.gamma_lut && + crtc_state->hw.gamma_lut && !crtc_state->c8_planes; /* On GLK+ degamma LUT is controlled by csc_enable */ crtc_state->csc_enable = - crtc_state->base.degamma_lut || + crtc_state->hw.degamma_lut || crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB || - crtc_state->base.ctm || crtc_state->limited_color_range; + crtc_state->hw.ctm || crtc_state->limited_color_range; crtc_state->gamma_mode = glk_gamma_mode(crtc_state); @@ -1319,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = glk_can_preload_luts(crtc_state); + return 0; } @@ -1326,14 +1432,14 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) { u32 gamma_mode = 0; - if (crtc_state->base.degamma_lut) + if (crtc_state->hw.degamma_lut) gamma_mode |= PRE_CSC_GAMMA_ENABLE; - if (crtc_state->base.gamma_lut && + if (crtc_state->hw.gamma_lut && !crtc_state->c8_planes) gamma_mode |= POST_CSC_GAMMA_ENABLE; - if (!crtc_state->base.gamma_lut || + if (!crtc_state->hw.gamma_lut || crtc_state_is_legacy_gamma(crtc_state)) gamma_mode |= GAMMA_MODE_MODE_8BIT; else @@ -1346,7 +1452,7 @@ static u32 icl_csc_mode(const struct intel_crtc_state *crtc_state) { u32 csc_mode = 0; - if (crtc_state->base.ctm) + if (crtc_state->hw.ctm) csc_mode |= ICL_CSC_ENABLE; if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB || @@ -1368,9 +1474,408 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) crtc_state->csc_mode = icl_csc_mode(crtc_state); + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + + return 0; +} + +static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 16; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) + return 10; + else + return i9xx_gamma_precision(crtc_state); +} + +static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + return chv_gamma_precision(crtc_state); + else + return i9xx_gamma_precision(crtc_state); + } else { + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + return glk_gamma_precision(crtc_state); + else if (IS_IRONLAKE(dev_priv)) + return ilk_gamma_precision(crtc_state); + } + return 0; } +static bool err_check(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, u32 err) +{ + return ((abs((long)lut2->red - lut1->red)) <= err) && + ((abs((long)lut2->blue - lut1->blue)) <= err) && + ((abs((long)lut2->green - lut1->green)) <= err); +} + +static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, + int lut_size, u32 err) +{ + int i; + + for (i = 0; i < lut_size; i++) { + if (!err_check(&lut1[i], &lut2[i], err)) + return false; + } + + return true; +} + +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision) +{ + struct drm_color_lut *lut1, *lut2; + int lut_size1, lut_size2; + u32 err; + + if (!blob1 != !blob2) + return false; + + if (!blob1) + return true; + + lut_size1 = drm_color_lut_size(blob1); + lut_size2 = drm_color_lut_size(blob2); + + /* check sw and hw lut size */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (lut_size1 != lut_size2) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + lut1 = blob1->data; + lut2 = blob2->data; + + err = 0xffff >> bit_precision; + + /* check sw and hw lut entry to be equal */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (!intel_color_lut_entry_equal(lut1, lut2, + lut_size2, err)) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + return true; +} + +/* convert hw value with given bit_precision to lut property val */ +static u32 intel_color_lut_pack(u32 val, u32 bit_precision) +{ + u32 max = 0xffff >> (16 - bit_precision); + + val = clamp_val(val, 0, max); + + if (bit_precision < 16) + val <<= 16 - bit_precision; + + return val; +} + +static struct drm_property_blob * +i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < LEGACY_LUT_LENGTH; i++) { + if (HAS_GMCH(dev_priv)) + val = I915_READ(PALETTE(pipe, i)); + else + val = I915_READ(LGC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_RED_MASK, val), 8); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_GREEN_MASK, val), 8); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_BLUE_MASK, val), 8); + } + + return blob; +} + +static void i9xx_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); +} + +static struct drm_property_blob * +i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val1, val2; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size - 1; i++) { + val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); + val2 = I915_READ(PALETTE(pipe, 2 * i + 1)); + + blob_data[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_RED_MASK, val1); + blob_data[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_GREEN_MASK, val1); + blob_data[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_BLUE_MASK, val1); + } + + blob_data[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 0))); + blob_data[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 1))); + blob_data[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 2))); + + return blob; +} + +static void i965_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->hw.gamma_lut = i965_read_lut_10p6(crtc_state); +} + +static struct drm_property_blob * +chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_BLUE_MASK, val), 10); + + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1)); + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_RED_MASK, val), 10); + } + + return blob; +} + +static void chv_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) + crtc_state->hw.gamma_lut = chv_read_cgm_lut(crtc_state); + else + i965_read_luts(crtc_state); +} + +static struct drm_property_blob * +ilk_read_lut_10(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(PREC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_BLUE_MASK, val), 10); + } + + return blob; +} + +static void ilk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->hw.gamma_lut = ilk_read_lut_10(crtc_state); +} + +static struct drm_property_blob * +glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int hw_lut_size = ivb_lut_10_size(prec_index); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * hw_lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | + PAL_PREC_AUTO_INCREMENT); + + for (i = 0; i < hw_lut_size; i++) { + val = I915_READ(PREC_PAL_DATA(pipe)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_BLUE_MASK, val), 10); + } + + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + + return blob; +} + +static void glk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->hw.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1383,14 +1888,17 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.color_check = chv_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = chv_load_luts; + dev_priv->display.read_luts = chv_read_luts; } else if (INTEL_GEN(dev_priv) >= 4) { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i965_load_luts; + dev_priv->display.read_luts = i965_read_luts; } else { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i9xx_load_luts; + dev_priv->display.read_luts = i9xx_read_luts; } } else { if (INTEL_GEN(dev_priv) >= 11) @@ -1409,16 +1917,19 @@ void intel_color_init(struct intel_crtc *crtc) else dev_priv->display.color_commit = ilk_color_commit; - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; - else if (INTEL_GEN(dev_priv) >= 8) + dev_priv->display.read_luts = glk_read_luts; + } else if (INTEL_GEN(dev_priv) >= 8) { dev_priv->display.load_luts = bdw_load_luts; - else if (INTEL_GEN(dev_priv) >= 7) + } else if (INTEL_GEN(dev_priv) >= 7) { dev_priv->display.load_luts = ivb_load_luts; - else + } else { dev_priv->display.load_luts = ilk_load_luts; + dev_priv->display.read_luts = ilk_read_luts; + } } drm_crtc_enable_color_mgmt(&crtc->base, diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index 057e8ac63555..173727aaa24d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -6,13 +6,20 @@ #ifndef __INTEL_COLOR_H__ #define __INTEL_COLOR_H__ +#include <linux/types.h> + struct intel_crtc_state; struct intel_crtc; +struct drm_property_blob; void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); void intel_color_commit(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state); +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision); #endif /* __INTEL_COLOR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 841708da5a56..44bbc7e74fc3 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -4,15 +4,15 @@ */ #include "intel_combo_phy.h" -#include "intel_drv.h" +#include "intel_display_types.h" -#define for_each_combo_port(__dev_priv, __port) \ - for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ - for_each_if(intel_port_is_combophy(__dev_priv, __port)) +#define for_each_combo_phy(__dev_priv, __phy) \ + for ((__phy) = PHY_A; (__phy) < I915_MAX_PHYS; (__phy)++) \ + for_each_if(intel_phy_is_combo(__dev_priv, __phy)) -#define for_each_combo_port_reverse(__dev_priv, __port) \ - for ((__port) = I915_MAX_PORTS; (__port)-- > PORT_A;) \ - for_each_if(intel_port_is_combophy(__dev_priv, __port)) +#define for_each_combo_phy_reverse(__dev_priv, __phy) \ + for ((__phy) = I915_MAX_PHYS; (__phy)-- > PHY_A;) \ + for_each_if(intel_phy_is_combo(__dev_priv, __phy)) enum { PROCMON_0_85V_DOT_0, @@ -38,18 +38,17 @@ static const struct cnl_procmon { }; /* - * CNL has just one set of registers, while ICL has two sets: one for port A and - * the other for port B. The CNL registers are equivalent to the ICL port A - * registers, that's why we call the ICL macros even though the function has CNL - * on its name. + * CNL has just one set of registers, while gen11 has a set for each combo PHY. + * The CNL registers are equivalent to the gen11 PHY A registers, that's why we + * call the ICL macros even though the function has CNL on its name. */ static const struct cnl_procmon * -cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum port port) +cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { const struct cnl_procmon *procmon; u32 val; - val = I915_READ(ICL_PORT_COMP_DW3(port)); + val = I915_READ(ICL_PORT_COMP_DW3(phy)); switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { default: MISSING_CASE(val); @@ -75,32 +74,32 @@ cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum port port) } static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, - enum port port) + enum phy phy) { const struct cnl_procmon *procmon; u32 val; - procmon = cnl_get_procmon_ref_values(dev_priv, port); + procmon = cnl_get_procmon_ref_values(dev_priv, phy); - val = I915_READ(ICL_PORT_COMP_DW1(port)); + val = I915_READ(ICL_PORT_COMP_DW1(phy)); val &= ~((0xff << 16) | 0xff); val |= procmon->dw1; - I915_WRITE(ICL_PORT_COMP_DW1(port), val); + I915_WRITE(ICL_PORT_COMP_DW1(phy), val); - I915_WRITE(ICL_PORT_COMP_DW9(port), procmon->dw9); - I915_WRITE(ICL_PORT_COMP_DW10(port), procmon->dw10); + I915_WRITE(ICL_PORT_COMP_DW9(phy), procmon->dw9); + I915_WRITE(ICL_PORT_COMP_DW10(phy), procmon->dw10); } static bool check_phy_reg(struct drm_i915_private *dev_priv, - enum port port, i915_reg_t reg, u32 mask, + enum phy phy, i915_reg_t reg, u32 mask, u32 expected_val) { u32 val = I915_READ(reg); if ((val & mask) != expected_val) { - DRM_DEBUG_DRIVER("Port %c combo PHY reg %08x state mismatch: " + DRM_DEBUG_DRIVER("Combo PHY %c reg %08x state mismatch: " "current %08x mask %08x expected %08x\n", - port_name(port), + phy_name(phy), reg.reg, val, mask, expected_val); return false; } @@ -109,18 +108,18 @@ static bool check_phy_reg(struct drm_i915_private *dev_priv, } static bool cnl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, - enum port port) + enum phy phy) { const struct cnl_procmon *procmon; bool ret; - procmon = cnl_get_procmon_ref_values(dev_priv, port); + procmon = cnl_get_procmon_ref_values(dev_priv, phy); - ret = check_phy_reg(dev_priv, port, ICL_PORT_COMP_DW1(port), + ret = check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW1(phy), (0xff << 16) | 0xff, procmon->dw1); - ret &= check_phy_reg(dev_priv, port, ICL_PORT_COMP_DW9(port), + ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW9(phy), -1U, procmon->dw9); - ret &= check_phy_reg(dev_priv, port, ICL_PORT_COMP_DW10(port), + ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW10(phy), -1U, procmon->dw10); return ret; @@ -134,15 +133,15 @@ static bool cnl_combo_phy_enabled(struct drm_i915_private *dev_priv) static bool cnl_combo_phy_verify_state(struct drm_i915_private *dev_priv) { - enum port port = PORT_A; + enum phy phy = PHY_A; bool ret; if (!cnl_combo_phy_enabled(dev_priv)) return false; - ret = cnl_verify_procmon_ref_values(dev_priv, port); + ret = cnl_verify_procmon_ref_values(dev_priv, phy); - ret &= check_phy_reg(dev_priv, port, CNL_PORT_CL1CM_DW5, + ret &= check_phy_reg(dev_priv, phy, CNL_PORT_CL1CM_DW5, CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE); return ret; @@ -157,7 +156,7 @@ static void cnl_combo_phys_init(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_MISC_2, val); /* Dummy PORT_A to get the correct CNL register from the ICL macro */ - cnl_set_procmon_ref_values(dev_priv, PORT_A); + cnl_set_procmon_ref_values(dev_priv, PHY_A); val = I915_READ(CNL_PORT_COMP_DW0); val |= COMP_INIT; @@ -181,35 +180,39 @@ static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) } static bool icl_combo_phy_enabled(struct drm_i915_private *dev_priv, - enum port port) + enum phy phy) { - return !(I915_READ(ICL_PHY_MISC(port)) & - ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN) && - (I915_READ(ICL_PORT_COMP_DW0(port)) & COMP_INIT); + /* The PHY C added by EHL has no PHY_MISC register */ + if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + return I915_READ(ICL_PORT_COMP_DW0(phy)) & COMP_INIT; + else + return !(I915_READ(ICL_PHY_MISC(phy)) & + ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN) && + (I915_READ(ICL_PORT_COMP_DW0(phy)) & COMP_INIT); } static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, - enum port port) + enum phy phy) { bool ret; - if (!icl_combo_phy_enabled(dev_priv, port)) + if (!icl_combo_phy_enabled(dev_priv, phy)) return false; - ret = cnl_verify_procmon_ref_values(dev_priv, port); + ret = cnl_verify_procmon_ref_values(dev_priv, phy); - if (port == PORT_A) - ret &= check_phy_reg(dev_priv, port, ICL_PORT_COMP_DW8(port), + if (phy == PHY_A) + ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), IREFGEN, IREFGEN); - ret &= check_phy_reg(dev_priv, port, ICL_PORT_CL_DW5(port), + ret &= check_phy_reg(dev_priv, phy, ICL_PORT_CL_DW5(phy), CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE); return ret; } void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, - enum port port, bool is_dsi, + enum phy phy, bool is_dsi, int lane_count, bool lane_reversal) { u8 lane_mask; @@ -254,66 +257,120 @@ void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, } } - val = I915_READ(ICL_PORT_CL_DW10(port)); + val = I915_READ(ICL_PORT_CL_DW10(phy)); val &= ~PWR_DOWN_LN_MASK; val |= lane_mask << PWR_DOWN_LN_SHIFT; - I915_WRITE(ICL_PORT_CL_DW10(port), val); + I915_WRITE(ICL_PORT_CL_DW10(phy), val); +} + +static u32 ehl_combo_phy_a_mux(struct drm_i915_private *i915, u32 val) +{ + bool ddi_a_present = i915->vbt.ddi_port_info[PORT_A].child != NULL; + bool ddi_d_present = i915->vbt.ddi_port_info[PORT_D].child != NULL; + bool dsi_present = intel_bios_is_dsi_present(i915, NULL); + + /* + * VBT's 'dvo port' field for child devices references the DDI, not + * the PHY. So if combo PHY A is wired up to drive an external + * display, we should see a child device present on PORT_D and + * nothing on PORT_A and no DSI. + */ + if (ddi_d_present && !ddi_a_present && !dsi_present) + return val | ICL_PHY_MISC_MUX_DDID; + + /* + * If we encounter a VBT that claims to have an external display on + * DDI-D _and_ an internal display on DDI-A/DSI leave an error message + * in the log and let the internal display win. + */ + if (ddi_d_present) + DRM_ERROR("VBT claims to have both internal and external displays on PHY A. Configuring for internal.\n"); + + return val & ~ICL_PHY_MISC_MUX_DDID; } static void icl_combo_phys_init(struct drm_i915_private *dev_priv) { - enum port port; + enum phy phy; - for_each_combo_port(dev_priv, port) { + for_each_combo_phy(dev_priv, phy) { u32 val; - if (icl_combo_phy_verify_state(dev_priv, port)) { - DRM_DEBUG_DRIVER("Port %c combo PHY already enabled, won't reprogram it.\n", - port_name(port)); + if (icl_combo_phy_verify_state(dev_priv, phy)) { + DRM_DEBUG_DRIVER("Combo PHY %c already enabled, won't reprogram it.\n", + phy_name(phy)); continue; } - val = I915_READ(ICL_PHY_MISC(port)); + /* + * Although EHL adds a combo PHY C, there's no PHY_MISC + * register for it and no need to program the + * DE_IO_COMP_PWR_DOWN setting on PHY C. + */ + if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + goto skip_phy_misc; + + /* + * EHL's combo PHY A can be hooked up to either an external + * display (via DDI-D) or an internal display (via DDI-A or + * the DSI DPHY). This is a motherboard design decision that + * can't be changed on the fly, so initialize the PHY's mux + * based on whether our VBT indicates the presence of any + * "internal" child devices. + */ + val = I915_READ(ICL_PHY_MISC(phy)); + if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_A) + val = ehl_combo_phy_a_mux(dev_priv, val); val &= ~ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN; - I915_WRITE(ICL_PHY_MISC(port), val); + I915_WRITE(ICL_PHY_MISC(phy), val); - cnl_set_procmon_ref_values(dev_priv, port); +skip_phy_misc: + cnl_set_procmon_ref_values(dev_priv, phy); - if (port == PORT_A) { - val = I915_READ(ICL_PORT_COMP_DW8(port)); + if (phy == PHY_A) { + val = I915_READ(ICL_PORT_COMP_DW8(phy)); val |= IREFGEN; - I915_WRITE(ICL_PORT_COMP_DW8(port), val); + I915_WRITE(ICL_PORT_COMP_DW8(phy), val); } - val = I915_READ(ICL_PORT_COMP_DW0(port)); + val = I915_READ(ICL_PORT_COMP_DW0(phy)); val |= COMP_INIT; - I915_WRITE(ICL_PORT_COMP_DW0(port), val); + I915_WRITE(ICL_PORT_COMP_DW0(phy), val); - val = I915_READ(ICL_PORT_CL_DW5(port)); + val = I915_READ(ICL_PORT_CL_DW5(phy)); val |= CL_POWER_DOWN_ENABLE; - I915_WRITE(ICL_PORT_CL_DW5(port), val); + I915_WRITE(ICL_PORT_CL_DW5(phy), val); } } static void icl_combo_phys_uninit(struct drm_i915_private *dev_priv) { - enum port port; + enum phy phy; - for_each_combo_port_reverse(dev_priv, port) { + for_each_combo_phy_reverse(dev_priv, phy) { u32 val; - if (port == PORT_A && - !icl_combo_phy_verify_state(dev_priv, port)) - DRM_WARN("Port %c combo PHY HW state changed unexpectedly\n", - port_name(port)); + if (phy == PHY_A && + !icl_combo_phy_verify_state(dev_priv, phy)) + DRM_WARN("Combo PHY %c HW state changed unexpectedly\n", + phy_name(phy)); + + /* + * Although EHL adds a combo PHY C, there's no PHY_MISC + * register for it and no need to program the + * DE_IO_COMP_PWR_DOWN setting on PHY C. + */ + if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + goto skip_phy_misc; - val = I915_READ(ICL_PHY_MISC(port)); + val = I915_READ(ICL_PHY_MISC(phy)); val |= ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN; - I915_WRITE(ICL_PHY_MISC(port), val); + I915_WRITE(ICL_PHY_MISC(phy), val); - val = I915_READ(ICL_PORT_COMP_DW0(port)); +skip_phy_misc: + val = I915_READ(ICL_PORT_COMP_DW0(phy)); val &= ~COMP_INIT; - I915_WRITE(ICL_PORT_COMP_DW0(port), val); + I915_WRITE(ICL_PORT_COMP_DW0(phy), val); } } diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.h b/drivers/gpu/drm/i915/display/intel_combo_phy.h index e6e195a83b19..660886f86c59 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.h +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.h @@ -7,14 +7,14 @@ #define __INTEL_COMBO_PHY_H__ #include <linux/types.h> -#include <drm/i915_drm.h> struct drm_i915_private; +enum phy; void intel_combo_phy_init(struct drm_i915_private *dev_priv); void intel_combo_phy_uninit(struct drm_i915_private *dev_priv); void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, - enum port port, bool is_dsi, + enum phy phy, bool is_dsi, int lane_count, bool lane_reversal); #endif /* __INTEL_COMBO_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 41310f8e5a2a..1133c4e97bb4 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -33,7 +33,7 @@ #include "i915_drv.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_hdcp.h" int intel_connector_init(struct intel_connector *connector) @@ -118,7 +118,7 @@ int intel_connector_register(struct drm_connector *connector) if (ret) goto err; - if (i915_inject_load_failure()) { + if (i915_inject_probe_failure(to_i915(connector->dev))) { ret = -EFAULT; goto err_backlight; } @@ -277,7 +277,22 @@ intel_attach_aspect_ratio_property(struct drm_connector *connector) void intel_attach_colorspace_property(struct drm_connector *connector) { - if (!drm_mode_create_colorspace_property(connector)) - drm_object_attach_property(&connector->base, - connector->colorspace_property, 0); + switch (connector->connector_type) { + case DRM_MODE_CONNECTOR_HDMIA: + case DRM_MODE_CONNECTOR_HDMIB: + if (drm_mode_create_hdmi_colorspace_property(connector)) + return; + break; + case DRM_MODE_CONNECTOR_DisplayPort: + case DRM_MODE_CONNECTOR_eDP: + if (drm_mode_create_dp_colorspace_property(connector)) + return; + break; + default: + DRM_DEBUG_KMS("Colorspace property not supported\n"); + return; + } + + drm_object_attach_property(&connector->base, + connector->colorspace_property, 0); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 3fcf2f84bcce..f976b800b245 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -38,7 +38,7 @@ #include "intel_connector.h" #include "intel_crt.h" #include "intel_ddi.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" #include "intel_hotplug.h" @@ -65,7 +65,7 @@ static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder) return container_of(encoder, struct intel_crt, base); } -static struct intel_crt *intel_attached_crt(struct drm_connector *connector) +static struct intel_crt *intel_attached_crt(struct intel_connector *connector) { return intel_encoder_to_crt(intel_attached_encoder(connector)); } @@ -132,9 +132,9 @@ static void intel_crt_get_config(struct intel_encoder *encoder, { pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); - pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder); + pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock; } static void hsw_crt_get_config(struct intel_encoder *encoder, @@ -144,13 +144,13 @@ static void hsw_crt_get_config(struct intel_encoder *encoder, intel_ddi_get_config(encoder, pipe_config); - pipe_config->base.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | + pipe_config->hw.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC); - pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder); + pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); - pipe_config->base.adjusted_mode.crtc_clock = lpt_get_iclkip(dev_priv); + pipe_config->hw.adjusted_mode.crtc_clock = lpt_get_iclkip(dev_priv); } /* Note: The caller is required to filter out dpms modes not supported by the @@ -161,8 +161,8 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; u32 adpa; if (INTEL_GEN(dev_priv) >= 5) @@ -241,6 +241,14 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + intel_crtc_vblank_off(old_crtc_state); + + intel_disable_pipe(old_crtc_state); + + intel_ddi_disable_transcoder_func(old_crtc_state); + + ilk_pfit_disable(old_crtc_state); + intel_ddi_disable_pipe_clock(old_crtc_state); pch_post_disable_crt(encoder, old_crtc_state, old_conn_state); @@ -271,14 +279,14 @@ static void hsw_pre_enable_crt(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; WARN_ON(!crtc_state->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - dev_priv->display.fdi_link_train(crtc, crtc_state); + hsw_fdi_link_train(encoder, crtc_state); intel_ddi_enable_pipe_clock(crtc_state); } @@ -288,7 +296,7 @@ static void hsw_enable_crt(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; WARN_ON(!crtc_state->has_pch_encoder); @@ -343,7 +351,7 @@ intel_crt_mode_valid(struct drm_connector *connector, /* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */ if (HAS_PCH_LPT(dev_priv) && - (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2)) + ilk_get_lanes_required(mode->clock, 270000, 24) > 2) return MODE_CLOCK_HIGH; /* HSW/BDW FDI limited to 4k */ @@ -358,7 +366,7 @@ static int intel_crt_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -373,7 +381,7 @@ static int pch_crt_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -390,7 +398,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -419,10 +427,10 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, return 0; } -static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) +static bool ilk_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(dev); u32 adpa; bool ret; @@ -432,7 +440,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) bool turn_off_dac = HAS_PCH_SPLIT(dev_priv); u32 save_adpa; - crt->force_hotplug_required = 0; + crt->force_hotplug_required = false; save_adpa = adpa = I915_READ(crt->adpa_reg); DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa); @@ -443,9 +451,9 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) I915_WRITE(crt->adpa_reg, adpa); - if (intel_wait_for_register(&dev_priv->uncore, + if (intel_de_wait_for_clear(dev_priv, crt->adpa_reg, - ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 0, + ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 1000)) DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER"); @@ -469,7 +477,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(dev); bool reenable_hpd; u32 adpa; @@ -497,10 +505,8 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) I915_WRITE(crt->adpa_reg, adpa); - if (intel_wait_for_register(&dev_priv->uncore, - crt->adpa_reg, - ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 0, - 1000)) { + if (intel_de_wait_for_clear(dev_priv, crt->adpa_reg, + ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 1000)) { DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER"); I915_WRITE(crt->adpa_reg, save_adpa); } @@ -529,7 +535,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) int i, tries = 0; if (HAS_PCH_SPLIT(dev_priv)) - return intel_ironlake_crt_detect_hotplug(connector); + return ilk_crt_detect_hotplug(connector); if (IS_VALLEYVIEW(dev_priv)) return valleyview_crt_detect_hotplug(connector); @@ -550,9 +556,8 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) CRT_HOTPLUG_FORCE_DETECT, CRT_HOTPLUG_FORCE_DETECT); /* wait for FORCE_DETECT to go off */ - if (intel_wait_for_register(&dev_priv->uncore, PORT_HOTPLUG_EN, - CRT_HOTPLUG_FORCE_DETECT, 0, - 1000)) + if (intel_de_wait_for_clear(dev_priv, PORT_HOTPLUG_EN, + CRT_HOTPLUG_FORCE_DETECT, 1000)) DRM_DEBUG_KMS("timed out waiting for FORCE_DETECT to go off"); } @@ -604,7 +609,7 @@ static int intel_crt_ddc_get_modes(struct drm_connector *connector, static bool intel_crt_detect_ddc(struct drm_connector *connector) { - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; @@ -790,7 +795,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct intel_encoder *intel_encoder = &crt->base; intel_wakeref_t wakeref; int status, ret; @@ -847,7 +852,7 @@ load_detect: } /* for pre-945g platforms use load detect */ - ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, &tmp, ctx); if (ret > 0) { if (intel_crt_detect_ddc(connector)) status = connector_status_connected; @@ -867,6 +872,13 @@ load_detect: out: intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); + + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } @@ -874,7 +886,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct intel_encoder *intel_encoder = &crt->base; intel_wakeref_t wakeref; struct i2c_adapter *i2c; @@ -913,7 +925,7 @@ void intel_crt_reset(struct drm_encoder *encoder) POSTING_READ(crt->adpa_reg); DRM_DEBUG_KMS("crt adpa set to 0x%x\n", adpa); - crt->force_hotplug_required = 1; + crt->force_hotplug_required = true; } } @@ -997,9 +1009,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI); if (IS_I830(dev_priv)) - crt->base.crtc_mask = (1 << 0); + crt->base.pipe_mask = BIT(PIPE_A); else - crt->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + crt->base.pipe_mask = ~0; if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; @@ -1051,7 +1063,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) /* * Configure the automatic hotplug detection stuff */ - crt->force_hotplug_required = 0; + crt->force_hotplug_required = false; /* * TODO: find a proper way to discover whether we need to set the the diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 7925a176f900..33f1dc3d7c1a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -32,10 +32,11 @@ #include "intel_combo_phy.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_display_types.h" #include "intel_dp.h" +#include "intel_dp_mst.h" #include "intel_dp_link_training.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" #include "intel_dsi.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" @@ -45,6 +46,8 @@ #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sprite.h" +#include "intel_tc.h" #include "intel_vdsc.h" struct ddi_buf_trans { @@ -585,6 +588,40 @@ static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = { { 0x0, 0x00, 0x00 }, /* 3 0 */ }; +struct tgl_dkl_phy_ddi_buf_trans { + u32 dkl_vswing_control; + u32 dkl_preshoot_control; + u32 dkl_de_emphasis_control; +}; + +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ + { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ + { 0x2, 0x0, 0x0b }, /* 0 2 400mV 6 dB */ + { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ + { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ + { 0x2, 0x0, 0x03 }, /* 1 1 600mV 3.5 dB */ + { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ + { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ + { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ + { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { + /* HDMI Preset VS Pre-emph */ + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -846,8 +883,8 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } static const struct cnl_ddi_buf_trans * -icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, - int type, int rate, int *n_entries) +icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate, + int *n_entries) { if (type == INTEL_OUTPUT_HDMI) { *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); @@ -866,13 +903,20 @@ icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { + struct ddi_vbt_port_info *port_info = &dev_priv->vbt.ddi_port_info[port]; int n_entries, level, default_entry; + enum phy phy = intel_port_to_phy(dev_priv, port); - level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - - if (INTEL_GEN(dev_priv) >= 11) { - if (intel_port_is_combophy(dev_priv, port)) - icl_get_combo_buf_trans(dev_priv, port, INTEL_OUTPUT_HDMI, + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, + 0, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + default_entry = n_entries - 1; + } else if (INTEL_GEN(dev_priv) == 11) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); @@ -897,12 +941,14 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return 0; } - /* Choose a good default if VBT is badly populated */ - if (level == HDMI_LEVEL_SHIFT_UNKNOWN || level >= n_entries) - level = default_entry; - if (WARN_ON_ONCE(n_entries == 0)) return 0; + + if (port_info->hdmi_level_shift_set) + level = port_info->hdmi_level_shift; + else + level = default_entry; + if (WARN_ON_ONCE(level >= n_entries)) level = n_entries - 1; @@ -1047,6 +1093,8 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, case DPLL_ID_ICL_MGPLL2: case DPLL_ID_ICL_MGPLL3: case DPLL_ID_ICL_MGPLL4: + case DPLL_ID_TGL_MGPLL5: + case DPLL_ID_TGL_MGPLL6: return DDI_CLK_SEL_MG; } } @@ -1060,18 +1108,14 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct intel_crtc *crtc, +void hsw_fdi_link_train(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_encoder *encoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 temp, i, rx_ctl_val, ddi_pll_sel; - for_each_encoder_on_crtc(dev, &crtc->base, encoder) { - WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_dp_ddi_buffers(encoder, crtc_state); - } + intel_prepare_dp_ddi_buffers(encoder, crtc_state); /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the * mode set "sequence for CRT port" document: @@ -1194,9 +1238,9 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_TRANS_SELECT(0); @@ -1411,11 +1455,30 @@ static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, ref_clock = dev_priv->cdclk.hw.ref; - m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; - m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; - m2_frac = (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? - (pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> - MG_PLL_DIV0_FBDIV_FRAC_SHIFT : 0; + if (INTEL_GEN(dev_priv) >= 12) { + m1 = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBPREDIV_MASK; + m1 = m1 >> DKL_PLL_DIV0_FBPREDIV_SHIFT; + m2_int = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_bias & DKL_PLL_BIAS_FRAC_EN_H) { + m2_frac = pll_state->mg_pll_bias & + DKL_PLL_BIAS_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> DKL_PLL_BIAS_FBDIV_SHIFT; + } else { + m2_frac = 0; + } + } else { + m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; + m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) { + m2_frac = pll_state->mg_pll_div0 & + MG_PLL_DIV0_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> MG_PLL_DIV0_FBDIV_FRAC_SHIFT; + } else { + m2_frac = 0; + } + } switch (pll_state->mg_clktop2_hsclkctl & MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK) { @@ -1465,8 +1528,8 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) else if (intel_crtc_has_dp_encoder(pipe_config)) dotclock = intel_dotclock_calculate(pipe_config->port_clock, &pipe_config->dp_m_n); - else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp == 36) - dotclock = pipe_config->port_clock * 2 / 3; + else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp > 24) + dotclock = pipe_config->port_clock * 24 / pipe_config->pipe_bpp; else dotclock = pipe_config->port_clock; @@ -1477,7 +1540,7 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) if (pipe_config->pixel_multiplier) dotclock /= pipe_config->pixel_multiplier; - pipe_config->base.adjusted_mode.crtc_clock = dotclock; + pipe_config->hw.adjusted_mode.crtc_clock = dotclock; } static void icl_ddi_clock_get(struct intel_encoder *encoder, @@ -1486,9 +1549,10 @@ static void icl_ddi_clock_get(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dpll_hw_state *pll_state = &pipe_config->dpll_hw_state; enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, port); int link_clock; - if (intel_port_is_combophy(dev_priv, port)) { + if (intel_phy_is_combo(dev_priv, phy)) { link_clock = cnl_calc_wrpll_link(dev_priv, pll_state); } else { enum intel_dpll_id pll_id = intel_get_shared_dpll_id(dev_priv, @@ -1689,9 +1753,10 @@ static void intel_ddi_clock_get(struct intel_encoder *encoder, hsw_ddi_clock_get(encoder, pipe_config); } -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 temp; @@ -1701,66 +1766,63 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) WARN_ON(transcoder_is_dsi(cpu_transcoder)); - temp = TRANS_MSA_SYNC_CLK; - - if (crtc_state->limited_color_range) - temp |= TRANS_MSA_CEA_RANGE; + temp = DP_MSA_MISC_SYNC_CLOCK; switch (crtc_state->pipe_bpp) { case 18: - temp |= TRANS_MSA_6_BPC; + temp |= DP_MSA_MISC_6_BPC; break; case 24: - temp |= TRANS_MSA_8_BPC; + temp |= DP_MSA_MISC_8_BPC; break; case 30: - temp |= TRANS_MSA_10_BPC; + temp |= DP_MSA_MISC_10_BPC; break; case 36: - temp |= TRANS_MSA_12_BPC; + temp |= DP_MSA_MISC_12_BPC; break; default: MISSING_CASE(crtc_state->pipe_bpp); break; } + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + + if (crtc_state->limited_color_range) + temp |= DP_MSA_MISC_COLOR_CEA_RGB; + /* * As per DP 1.2 spec section 2.3.4.3 while sending * YCBCR 444 signals we should program MSA MISC1/0 fields with - * colorspace information. The output colorspace encoding is BT601. + * colorspace information. */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) - temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR; + temp |= DP_MSA_MISC_COLOR_YCBCR_444_BT709; + /* * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication * of Color Encoding Format and Content Color Gamut] while sending - * YCBCR 420 signals we should program MSA MISC1 fields which - * indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields + * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. */ - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - temp |= TRANS_MSA_USE_VSC_SDP; - I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); -} + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + temp |= DP_MSA_MISC_COLOR_VSC_SDP; -void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, - bool state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - u32 temp; - - temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); - if (state == true) - temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; - else - temp &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC; - I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); + I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +/* + * Returns the TRANS_DDI_FUNC_CTL value based on CRTC state. + * + * Only intended to be used by intel_ddi_enable_transcoder_func() and + * intel_ddi_config_transcoder_func(). + */ +static u32 +intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -1770,7 +1832,10 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ temp = TRANS_DDI_FUNC_ENABLE; - temp |= TRANS_DDI_SELECT_PORT(port); + if (INTEL_GEN(dev_priv) >= 12) + temp |= TGL_TRANS_DDI_SELECT_PORT(port); + else + temp |= TRANS_DDI_SELECT_PORT(port); switch (crtc_state->pipe_bpp) { case 18: @@ -1789,9 +1854,9 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) BUG(); } - if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1834,25 +1899,69 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); + + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder master; + + master = crtc_state->mst_master_transcoder; + WARN_ON(master == INVALID_TRANSCODER); + temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); + } } else { temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } + return temp; +} + +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); +} + +/* + * Same as intel_ddi_enable_transcoder_func(), but it does not set the enable + * bit. + */ +static void +intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + temp &= ~TRANS_DDI_FUNC_ENABLE; I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); - u32 val = I915_READ(reg); + u32 val; - val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC); - val |= TRANS_DDI_PORT_NONE; - I915_WRITE(reg, val); + val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + val &= ~TRANS_DDI_FUNC_ENABLE; + + if (INTEL_GEN(dev_priv) >= 12) { + if (!intel_dp_mst_is_master_trans(crtc_state)) + val &= ~TGL_TRANS_DDI_PORT_MASK; + } else { + val &= ~TRANS_DDI_PORT_MASK; + } + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val); if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { @@ -2003,10 +2112,27 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, mst_pipe_mask = 0; for_each_pipe(dev_priv, p) { enum transcoder cpu_transcoder = (enum transcoder)p; + unsigned int port_mask, ddi_select; + intel_wakeref_t trans_wakeref; + + trans_wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_TRANSCODER(cpu_transcoder)); + if (!trans_wakeref) + continue; + + if (INTEL_GEN(dev_priv) >= 12) { + port_mask = TGL_TRANS_DDI_PORT_MASK; + ddi_select = TGL_TRANS_DDI_SELECT_PORT(port); + } else { + port_mask = TRANS_DDI_PORT_MASK; + ddi_select = TRANS_DDI_SELECT_PORT(port); + } tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + intel_display_power_put(dev_priv, POWER_DOMAIN_TRANSCODER(cpu_transcoder), + trans_wakeref); - if ((tmp & TRANS_DDI_PORT_MASK) != TRANS_DDI_SELECT_PORT(port)) + if ((tmp & port_mask) != ddi_select) continue; if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == @@ -2017,18 +2143,20 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, } if (!*pipe_mask) - DRM_DEBUG_KMS("No pipe for ddi port %c found\n", - port_name(port)); + DRM_DEBUG_KMS("No pipe for [ENCODER:%d:%s] found\n", + encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) { - DRM_DEBUG_KMS("Multiple pipes for non DP-MST port %c (pipe_mask %02x)\n", - port_name(port), *pipe_mask); + DRM_DEBUG_KMS("Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask); *pipe_mask = BIT(ffs(*pipe_mask) - 1); } if (mst_pipe_mask && mst_pipe_mask != *pipe_mask) - DRM_DEBUG_KMS("Conflicting MST and non-MST encoders for port %c (pipe_mask %02x mst_pipe_mask %02x)\n", - port_name(port), *pipe_mask, mst_pipe_mask); + DRM_DEBUG_KMS("Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask, mst_pipe_mask); else *is_dp_mst = mst_pipe_mask; @@ -2038,8 +2166,9 @@ out: if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) - DRM_ERROR("Port %c enabled but PHY powered down? " - "(PHY_CTL %08x)\n", port_name(port), tmp); + DRM_ERROR("[ENCODER:%d:%s] enabled but PHY powered down? " + "(PHY_CTL %08x)\n", encoder->base.base.id, + encoder->base.name, tmp); } intel_display_power_put(dev_priv, encoder->power_domain, wakeref); @@ -2085,6 +2214,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); /* * TODO: Add support for MST encoders. Atm, the following should never @@ -2094,7 +2224,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))) return; - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); /* @@ -2102,39 +2232,49 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, * ports. */ if (intel_crtc_has_dp_encoder(crtc_state) || - intel_port_is_tc(dev_priv, encoder->port)) + intel_phy_is_tc(dev_priv, phy)) intel_display_power_get(dev_priv, intel_ddi_main_link_aux_domain(dig_port)); /* * VDSC power is needed when DSC is enabled */ - if (crtc_state->dsc_params.compression_enable) + if (crtc_state->dsc.compression_enable) intel_display_power_get(dev_priv, intel_dsc_power_domain(crtc_state)); } void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = encoder->port; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - if (cpu_transcoder != TRANSCODER_EDP) - I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), - TRANS_CLK_SEL_PORT(port)); + if (cpu_transcoder != TRANSCODER_EDP) { + if (INTEL_GEN(dev_priv) >= 12) + I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), + TGL_TRANS_CLK_SEL_PORT(port)); + else + I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), + TRANS_CLK_SEL_PORT(port)); + } } void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - if (cpu_transcoder != TRANSCODER_EDP) - I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), - TRANS_CLK_SEL_DISABLED); + if (cpu_transcoder != TRANSCODER_EDP) { + if (INTEL_GEN(dev_priv) >= 12) + I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), + TGL_TRANS_CLK_SEL_DISABLED); + else + I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), + TRANS_CLK_SEL_DISABLED); + } } static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, @@ -2154,7 +2294,7 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, static void skl_ddi_set_iboost(struct intel_encoder *encoder, int level, enum intel_output_type type) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; u8 iboost; @@ -2225,13 +2365,20 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; - if (INTEL_GEN(dev_priv) >= 11) { - if (intel_port_is_combophy(dev_priv, port)) - icl_get_combo_buf_trans(dev_priv, port, encoder->type, + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, encoder->type, + intel_dp->link_rate, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + } else if (INTEL_GEN(dev_priv) == 11) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); @@ -2357,7 +2504,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, width = 4; rate = 0; /* Rate is always < than 6GHz for HDMI */ } else { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); width = intel_dp->lane_count; rate = intel_dp->link_rate; @@ -2413,15 +2560,15 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, } static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type, + u32 level, enum phy phy, int type, int rate) { const struct cnl_ddi_buf_trans *ddi_translations = NULL; u32 n_entries, val; int ln; - ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, - rate, &n_entries); + ddi_translations = icl_get_combo_buf_trans(dev_priv, type, rate, + &n_entries); if (!ddi_translations) return; @@ -2431,41 +2578,41 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, } /* Set PORT_TX_DW5 */ - val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK | TAP2_DISABLE | TAP3_DISABLE); val |= SCALING_MODE_SEL(0x2); val |= RTERM_SELECT(0x6); val |= TAP3_DISABLE; - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), val); /* Program PORT_TX_DW2 */ - val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + val = I915_READ(ICL_PORT_TX_DW2_LN0(phy)); val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); /* Program Rcomp scalar for every table entry */ val |= RCOMP_SCALAR(0x98); - I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); + I915_WRITE(ICL_PORT_TX_DW2_GRP(phy), val); /* Program PORT_TX_DW4 */ /* We cannot write to GRP. It would overwrite individual loadgen. */ for (ln = 0; ln <= 3; ln++) { - val = I915_READ(ICL_PORT_TX_DW4_LN(ln, port)); + val = I915_READ(ICL_PORT_TX_DW4_LN(ln, phy)); val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); - I915_WRITE(ICL_PORT_TX_DW4_LN(ln, port), val); + I915_WRITE(ICL_PORT_TX_DW4_LN(ln, phy), val); } /* Program PORT_TX_DW7 */ - val = I915_READ(ICL_PORT_TX_DW7_LN0(port)); + val = I915_READ(ICL_PORT_TX_DW7_LN0(phy)); val &= ~N_SCALAR_MASK; val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); - I915_WRITE(ICL_PORT_TX_DW7_GRP(port), val); + I915_WRITE(ICL_PORT_TX_DW7_GRP(phy), val); } static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, @@ -2473,7 +2620,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); int width = 0; int rate = 0; u32 val; @@ -2483,7 +2630,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, width = 4; /* Rate is always < than 6GHz for HDMI */ } else { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); width = intel_dp->lane_count; rate = intel_dp->link_rate; @@ -2494,12 +2641,12 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, * set PORT_PCS_DW1 cmnkeeper_enable to 1b, * else clear to 0b. */ - val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + val = I915_READ(ICL_PORT_PCS_DW1_LN0(phy)); if (type == INTEL_OUTPUT_HDMI) val &= ~COMMON_KEEPER_EN; else val |= COMMON_KEEPER_EN; - I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); + I915_WRITE(ICL_PORT_PCS_DW1_GRP(phy), val); /* 2. Program loadgen select */ /* @@ -2509,33 +2656,33 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) */ for (ln = 0; ln <= 3; ln++) { - val = I915_READ(ICL_PORT_TX_DW4_LN(ln, port)); + val = I915_READ(ICL_PORT_TX_DW4_LN(ln, phy)); val &= ~LOADGEN_SELECT; if ((rate <= 600000 && width == 4 && ln >= 1) || (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { val |= LOADGEN_SELECT; } - I915_WRITE(ICL_PORT_TX_DW4_LN(ln, port), val); + I915_WRITE(ICL_PORT_TX_DW4_LN(ln, phy), val); } /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ - val = I915_READ(ICL_PORT_CL_DW5(port)); + val = I915_READ(ICL_PORT_CL_DW5(phy)); val |= SUS_CLOCK_CONFIG; - I915_WRITE(ICL_PORT_CL_DW5(port), val); + I915_WRITE(ICL_PORT_CL_DW5(phy), val); /* 4. Clear training enable to change swing values */ - val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); val &= ~TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), val); /* 5. Program swing and de-emphasis */ - icl_ddi_combo_vswing_program(dev_priv, level, port, type, rate); + icl_ddi_combo_vswing_program(dev_priv, level, phy, type, rate); /* 6. Set training enable to trigger update */ - val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val = I915_READ(ICL_PORT_TX_DW5_LN0(phy)); val |= TX_TRAINING_EN; - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + I915_WRITE(ICL_PORT_TX_DW5_GRP(phy), val); } static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, @@ -2543,7 +2690,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct icl_mg_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val; int ln; @@ -2559,33 +2706,33 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, /* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX1_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX1_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX1_LINK_PARAMS(ln, tc_port), val); - val = I915_READ(MG_TX2_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX2_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX2_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX2_LINK_PARAMS(ln, tc_port), val); } /* Program MG_TX_SWINGCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX1_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX1_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX1_SWINGCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX2_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX2_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX2_SWINGCTRL(ln, tc_port), val); } /* Program MG_TX_DRVCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DRVCTRL(ln, port)); + val = I915_READ(MG_TX1_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2593,9 +2740,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX1_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX1_DRVCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_DRVCTRL(ln, port)); + val = I915_READ(MG_TX2_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2603,7 +2750,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX2_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX2_DRVCTRL(ln, tc_port), val); /* FIXME: Program CRI_LOADGEN_SEL after the spec is updated */ } @@ -2614,17 +2761,17 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, * values from table for which TX1 and TX2 enabled. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_CLKHUB(ln, port)); + val = I915_READ(MG_CLKHUB(ln, tc_port)); if (link_clock < 300000) val |= CFG_LOW_RATE_LKREN_EN; else val &= ~CFG_LOW_RATE_LKREN_EN; - I915_WRITE(MG_CLKHUB(ln, port), val); + I915_WRITE(MG_CLKHUB(ln, tc_port), val); } /* Program the MG_TX_DCC<LN, port being used> based on the link frequency */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DCC(ln, port)); + val = I915_READ(MG_TX1_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2632,9 +2779,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX1_DCC(ln, port), val); + I915_WRITE(MG_TX1_DCC(ln, tc_port), val); - val = I915_READ(MG_TX2_DCC(ln, port)); + val = I915_READ(MG_TX2_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2642,18 +2789,18 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX2_DCC(ln, port), val); + I915_WRITE(MG_TX2_DCC(ln, tc_port), val); } /* Program MG_TX_PISO_READLOAD with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX1_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX1_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX1_PISO_READLOAD(ln, tc_port), val); - val = I915_READ(MG_TX2_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX2_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX2_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX2_PISO_READLOAD(ln, tc_port), val); } } @@ -2663,14 +2810,77 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - if (intel_port_is_combophy(dev_priv, port)) + if (intel_phy_is_combo(dev_priv, phy)) icl_combo_phy_ddi_vswing_sequence(encoder, level, type); else icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level); } +static void +tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, + u32 level) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); + const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; + u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; + + if (encoder->type == INTEL_OUTPUT_HDMI) { + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; + } else { + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + ddi_translations = tgl_dkl_phy_dp_ddi_trans; + } + + if (level >= n_entries) + level = n_entries - 1; + + dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK); + dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control); + dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control); + dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control); + + for (ln = 0; ln < 2; ln++) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + + I915_WRITE(DKL_TX_PMD_LANE_SUS(tc_port), 0); + + /* All the registers are RMW */ + val = I915_READ(DKL_TX_DPCNTL0(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL0(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL1(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL1(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL2(tc_port)); + val &= ~DKL_TX_DP20BITMODE; + I915_WRITE(DKL_TX_DPCNTL2(tc_port), val); + } +} + +static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, + int link_clock, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (intel_phy_is_combo(dev_priv, phy)) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); +} + static u32 translate_signal_level(int signal_levels) { int i; @@ -2702,7 +2912,10 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate, + level, encoder->type); + else if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, intel_dp->link_rate, level, encoder->type); else if (IS_CANNONLAKE(dev_priv)) @@ -2728,12 +2941,13 @@ u32 ddi_signal_levels(struct intel_dp *intel_dp) static inline u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, - enum port port) + enum phy phy) { - if (intel_port_is_combophy(dev_priv, port)) { - return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(port); - } else if (intel_port_is_tc(dev_priv, port)) { - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); + if (intel_phy_is_combo(dev_priv, phy)) { + return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); + } else if (intel_phy_is_tc(dev_priv, phy)) { + enum tc_port tc_port = intel_port_to_tc(dev_priv, + (enum port)phy); return ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port); } @@ -2746,23 +2960,33 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; - enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); u32 val; mutex_lock(&dev_priv->dpll_lock); - val = I915_READ(DPCLKA_CFGCR0_ICL); - WARN_ON((val & icl_dpclka_cfgcr0_clk_off(dev_priv, port)) == 0); + val = I915_READ(ICL_DPCLKA_CFGCR0); + WARN_ON((val & icl_dpclka_cfgcr0_clk_off(dev_priv, phy)) == 0); - if (intel_port_is_combophy(dev_priv, port)) { - val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); - I915_WRITE(DPCLKA_CFGCR0_ICL, val); - POSTING_READ(DPCLKA_CFGCR0_ICL); + if (intel_phy_is_combo(dev_priv, phy)) { + /* + * Even though this register references DDIs, note that we + * want to pass the PHY rather than the port (DDI). For + * ICL, port=phy in all cases so it doesn't matter, but for + * EHL the bspec notes the following: + * + * "DDID clock tied to DDIA clock, so DPCLKA_CFGCR0 DDIA + * Clock Select chooses the PLL for both DDIA and DDID and + * drives port A in all cases." + */ + val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + val |= ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); + POSTING_READ(ICL_DPCLKA_CFGCR0); } - val &= ~icl_dpclka_cfgcr0_clk_off(dev_priv, port); - I915_WRITE(DPCLKA_CFGCR0_ICL, val); + val &= ~icl_dpclka_cfgcr0_clk_off(dev_priv, phy); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); mutex_unlock(&dev_priv->dpll_lock); } @@ -2770,23 +2994,50 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, static void icl_unmap_plls_to_ports(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); u32 val; mutex_lock(&dev_priv->dpll_lock); - val = I915_READ(DPCLKA_CFGCR0_ICL); - val |= icl_dpclka_cfgcr0_clk_off(dev_priv, port); - I915_WRITE(DPCLKA_CFGCR0_ICL, val); + val = I915_READ(ICL_DPCLKA_CFGCR0); + val |= icl_dpclka_cfgcr0_clk_off(dev_priv, phy); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); mutex_unlock(&dev_priv->dpll_lock); } +static void icl_sanitize_port_clk_off(struct drm_i915_private *dev_priv, + u32 port_mask, bool ddi_clk_needed) +{ + enum port port; + u32 val; + + val = I915_READ(ICL_DPCLKA_CFGCR0); + for_each_port_masked(port, port_mask) { + enum phy phy = intel_port_to_phy(dev_priv, port); + bool ddi_clk_off = val & icl_dpclka_cfgcr0_clk_off(dev_priv, + phy); + + if (ddi_clk_needed == !ddi_clk_off) + continue; + + /* + * Punt on the case now where clock is gated, but it would + * be needed by the port. Something else is really broken then. + */ + if (WARN_ON(ddi_clk_needed)) + continue; + + DRM_NOTE("PHY %c is disabled/in DSI mode with an ungated DDI clock, gate it\n", + phy_name(phy)); + val |= icl_dpclka_cfgcr0_clk_off(dev_priv, phy); + I915_WRITE(ICL_DPCLKA_CFGCR0, val); + } +} + void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 val; - enum port port; u32 port_mask; bool ddi_clk_needed; @@ -2835,27 +3086,7 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) ddi_clk_needed = false; } - val = I915_READ(DPCLKA_CFGCR0_ICL); - for_each_port_masked(port, port_mask) { - bool ddi_clk_ungated = !(val & - icl_dpclka_cfgcr0_clk_off(dev_priv, - port)); - - if (ddi_clk_needed == ddi_clk_ungated) - continue; - - /* - * Punt on the case now where clock is gated, but it would - * be needed by the port. Something else is really broken then. - */ - if (WARN_ON(ddi_clk_needed)) - continue; - - DRM_NOTE("Port %c is disabled/in DSI mode with an ungated DDI clock, gate it\n", - port_name(port)); - val |= icl_dpclka_cfgcr0_clk_off(dev_priv, port); - I915_WRITE(DPCLKA_CFGCR0_ICL, val); - } + icl_sanitize_port_clk_off(dev_priv, port_mask, ddi_clk_needed); } static void intel_ddi_clk_select(struct intel_encoder *encoder, @@ -2863,6 +3094,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, port); u32 val; const struct intel_shared_dpll *pll = crtc_state->shared_dpll; @@ -2872,9 +3104,15 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, mutex_lock(&dev_priv->dpll_lock); if (INTEL_GEN(dev_priv) >= 11) { - if (!intel_port_is_combophy(dev_priv, port)) + if (!intel_phy_is_combo(dev_priv, phy)) I915_WRITE(DDI_CLK_SEL(port), icl_pll_to_ddi_clk_sel(encoder, crtc_state)); + else if (IS_ELKHARTLAKE(dev_priv) && port >= PORT_C) + /* + * MG does not exist but the programming is required + * to ungate DDIC and DDID + */ + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_MG); } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2912,9 +3150,11 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, port); if (INTEL_GEN(dev_priv) >= 11) { - if (!intel_port_is_combophy(dev_priv, port)) + if (!intel_phy_is_combo(dev_priv, phy) || + (IS_ELKHARTLAKE(dev_priv) && port >= PORT_C)) I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); } else if (IS_CANNONLAKE(dev_priv)) { I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | @@ -2927,133 +3167,90 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) } } -static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; - int ln; - - if (tc_port == PORT_TC_NONE) - return; - - for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); - val |= MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(MG_DP_MODE(ln, port), val); - } - - val = I915_READ(MG_MISC_SUS0(tc_port)); - val |= MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3) | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING; - I915_WRITE(MG_MISC_SUS0(tc_port), val); -} - -static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; - int ln; - - if (tc_port == PORT_TC_NONE) - return; - - for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); - val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING); - I915_WRITE(MG_DP_MODE(ln, port), val); - } - - val = I915_READ(MG_MISC_SUS0(tc_port)); - val &= ~(MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING); - I915_WRITE(MG_MISC_SUS0(tc_port), val); -} - -static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port) +static void +icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 ln0, ln1, lane_info; + enum tc_port tc_port = intel_port_to_tc(dev_priv, intel_dig_port->base.port); + u32 ln0, ln1, pin_assignment; + u8 width; - if (tc_port == PORT_TC_NONE || intel_dig_port->tc_type == TC_PORT_TBT) + if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) return; - ln0 = I915_READ(MG_DP_MODE(0, port)); - ln1 = I915_READ(MG_DP_MODE(1, port)); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + ln0 = I915_READ(DKL_DP_MODE(tc_port)); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + ln1 = I915_READ(DKL_DP_MODE(tc_port)); + } else { + ln0 = I915_READ(MG_DP_MODE(0, tc_port)); + ln1 = I915_READ(MG_DP_MODE(1, tc_port)); + } - switch (intel_dig_port->tc_type) { - case TC_PORT_TYPEC: - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - lane_info = (I915_READ(PORT_TX_DFLEXDPSP) & - DP_LANE_ASSIGNMENT_MASK(tc_port)) >> - DP_LANE_ASSIGNMENT_SHIFT(tc_port); + /* DPPATC */ + pin_assignment = intel_tc_port_get_pin_assignment_mask(intel_dig_port); + width = crtc_state->lane_count; - switch (lane_info) { - case 0x1: - case 0x4: - break; - case 0x2: + switch (pin_assignment) { + case 0x0: + WARN_ON(intel_dig_port->tc_mode != TC_PORT_LEGACY); + if (width == 1) { + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x1: + if (width == 4) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x2: + if (width == 2) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x3: + case 0x5: + if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0x3: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0x8: ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0xC: - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0xF: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - default: - MISSING_CASE(lane_info); + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - - case TC_PORT_LEGACY: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; + case 0x4: + case 0x6: + if (width == 1) { + ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } break; - default: - MISSING_CASE(intel_dig_port->tc_type); - return; + MISSING_CASE(pin_assignment); } - I915_WRITE(MG_DP_MODE(0, port), ln0); - I915_WRITE(MG_DP_MODE(1, port), ln1); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + I915_WRITE(DKL_DP_MODE(tc_port), ln0); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + I915_WRITE(DKL_DP_MODE(tc_port), ln1); + } else { + I915_WRITE(MG_DP_MODE(0, tc_port), ln0); + I915_WRITE(MG_DP_MODE(1, tc_port), ln1); + } } static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, @@ -3070,20 +3267,19 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(encoder); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val |= DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); - if (intel_wait_for_register(&dev_priv->uncore, DP_TP_STATUS(port), - DP_TP_STATUS_FEC_ENABLE_LIVE, - DP_TP_STATUS_FEC_ENABLE_LIVE, - 1)) + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_FEC_ENABLE_LIVE, 1)) DRM_ERROR("Timed out waiting for FEC Enable Status\n"); } @@ -3091,42 +3287,273 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(encoder); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); } -static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void +tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev); + u32 val; + + if (!cstate->dc3co_exitline) + return; + + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void +tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + u32 val, exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev); + + if (!cstate->dc3co_exitline) + return; + + exit_scanlines = cstate->dc3co_exitline; + exit_scanlines <<= EXITLINE_SHIFT; + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + val |= exit_scanlines; + val |= EXITLINE_ENABLE; + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *cstate) +{ + u32 exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev); + u32 crtc_vdisplay = cstate->hw.adjusted_mode.crtc_vdisplay; + + cstate->dc3co_exitline = 0; + + if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO)) + return; + + /* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/ + if (to_intel_crtc(cstate->uapi.crtc)->pipe != PIPE_A || + encoder->port != PORT_A) + return; + + if (!cstate->has_psr2 || !cstate->hw.active) + return; + + /* + * DC3CO Exit time 200us B.Spec 49196 + * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 + */ + exit_scanlines = + intel_usecs_to_scanlines(&cstate->hw.adjusted_mode, 200) + 1; + + if (WARN_ON(exit_scanlines > crtc_vdisplay)) + return; + + cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines; + DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline); +} + +static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state) +{ + u32 val; + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + + if (INTEL_GEN(dev_priv) < 12) + return; + + val = I915_READ(EXITLINE(crtc_state->cpu_transcoder)); + + if (val & EXITLINE_ENABLE) + crtc_state->dc3co_exitline = val & EXITLINE_MASK; +} + +static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + enum transcoder transcoder = crtc_state->cpu_transcoder; + + tgl_set_psr2_transcoder_exitline(crtc_state); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); + + intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(transcoder); + intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(transcoder); + + /* + * 1. Enable Power Wells + * + * This was handled at the beginning of intel_atomic_commit_tail(), + * before we called down into this function. + */ + + /* 2. Enable Panel Power if PPS is required */ + intel_edp_panel_on(intel_dp); + + /* + * 3. For non-TBT Type-C ports, set FIA lane count + * (DFLEXDPSP.DPX4TXLATC) + * + * This was done before tgl_ddi_pre_enable_dp by + * hsw_crtc_enable()->intel_encoders_pre_pll_enable(). + */ + + /* + * 4. Enable the port PLL. + * + * The PLL enabling itself was already done before this function by + * hsw_crtc_enable()->intel_enable_shared_dpll(). We need only + * configure the PLL to port mapping here. + */ + intel_ddi_clk_select(encoder, crtc_state); + + /* 5. If IO power is controlled through PWR_WELL_CTL, Enable IO Power */ + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); + + /* 6. Program DP_MODE */ + icl_program_mg_dp_mode(dig_port, crtc_state); + + /* + * 7. The rest of the below are substeps under the bspec's "Enable and + * Train Display Port" step. Note that steps that are specific to + * MST will be handled by intel_mst_pre_enable_dp() before/after it + * calls into this function. Also intel_mst_pre_enable_dp() only calls + * us when active_mst_links==0, so any steps designated for "single + * stream or multi-stream master transcoder" can just be performed + * unconditionally here. + */ + + /* + * 7.a Configure Transcoder Clock Select to direct the Port clock to the + * Transcoder. + */ + intel_ddi_enable_pipe_clock(crtc_state); + + /* + * 7.b Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST + * Transport Select + */ + intel_ddi_config_transcoder_func(crtc_state); + + /* + * 7.c Configure & enable DP_TP_CTL with link training pattern 1 + * selected + * + * This will be handled by the intel_dp_start_link_train() farther + * down this function. + */ + + /* 7.e Configure voltage swing and related IO settings */ + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, + encoder->type); + + /* + * 7.f Combo PHY: Configure PORT_CL_DW10 Static Power Down to power up + * the used lanes of the DDI. + */ + if (intel_phy_is_combo(dev_priv, phy)) { + bool lane_reversal = + dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + + intel_combo_phy_power_up_lanes(dev_priv, phy, false, + crtc_state->lane_count, + lane_reversal); + } + + /* + * 7.g Configure and enable DDI_BUF_CTL + * 7.h Wait for DDI_BUF_CTL DDI Idle Status = 0b (Not Idle), timeout + * after 500 us. + * + * We only configure what the register value will be here. Actual + * enabling happens during link training farther down. + */ + intel_ddi_init_dp_buf_reg(encoder); + + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + + intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + /* + * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit + * in the FEC_CONFIGURATION register to 1 before initiating link + * training + */ + intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + + /* + * 7.i Follow DisplayPort specification training sequence (see notes for + * failure handling) + * 7.j If DisplayPort multi-stream - Set DP_TP_CTL link training to Idle + * Pattern, wait for 5 idle patterns (DP_TP_STATUS Min_Idles_Sent) + * (timeout after 800 us) + */ + intel_dp_start_link_train(intel_dp); + + /* 7.k Set DP_TP_CTL link training to Normal */ + if (!is_trans_port_sync_mode(crtc_state)) + intel_dp_stop_link_train(intel_dp); + + /* 7.l Configure and enable FEC if needed */ + intel_ddi_enable_fec(encoder, crtc_state); + intel_dsc_enable(encoder, crtc_state); +} + +static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum phy phy = intel_port_to_phy(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int level = intel_ddi_dp_level(intel_dp); - WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); + if (INTEL_GEN(dev_priv) < 11) + WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); + else + WARN_ON(is_mst && port == PORT_A); intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, crtc_state); - intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3138,11 +3565,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else intel_prepare_dp_ddi_buffers(encoder, crtc_state); - if (intel_port_is_combophy(dev_priv, port)) { + if (intel_phy_is_combo(dev_priv, phy)) { bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; - intel_combo_phy_power_up_lanes(dev_priv, port, false, + intel_combo_phy_power_up_lanes(dev_priv, phy, false, crtc_state->lane_count, lane_reversal); } @@ -3154,39 +3581,58 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); intel_dp_start_link_train(intel_dp); - if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) + if ((port != PORT_A || INTEL_GEN(dev_priv) >= 9) && + !is_trans_port_sync_mode(crtc_state)) intel_dp_stop_link_train(intel_dp); intel_ddi_enable_fec(encoder, crtc_state); - icl_enable_phy_clock_gating(dig_port); - if (!is_mst) intel_ddi_enable_pipe_clock(crtc_state); intel_dsc_enable(encoder, crtc_state); } +static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + else + hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + + /* MST will call a setting of MSA after an allocating of Virtual Channel + * from MST encoder pre_enable callback. + */ + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + intel_ddi_set_dp_msa(crtc_state, conn_state); +} + static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int level = intel_ddi_hdmi_level(dev_priv, port); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, crtc_state); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, + level, INTEL_OUTPUT_HDMI); + else if (INTEL_GEN(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, INTEL_OUTPUT_HDMI); else if (IS_CANNONLAKE(dev_priv)) @@ -3196,8 +3642,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, else intel_prepare_hdmi_ddi_buffers(encoder, level); - icl_enable_phy_clock_gating(dig_port); - if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); @@ -3212,7 +3656,7 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -3240,12 +3684,12 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state); } else { struct intel_lspcon *lspcon = - enc_to_intel_lspcon(&encoder->base); + enc_to_intel_lspcon(encoder); intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state); if (lspcon->active) { struct intel_digital_port *dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); dig_port->set_infoframes(encoder, crtc_state->has_infoframe, @@ -3269,10 +3713,14 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, wait = true; } - val = I915_READ(DP_TP_CTL(port)); - val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); - val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); + if (intel_crtc_has_dp_encoder(crtc_state)) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + val = I915_READ(intel_dp->regs.dp_tp_ctl); + val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); + val |= DP_TP_CTL_LINK_TRAIN_PAT1; + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + } /* Disable FEC in DP Sink */ intel_ddi_disable_fec_state(encoder, crtc_state); @@ -3286,29 +3734,52 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_dp *intel_dp = &dig_port->dp; bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - if (!is_mst) { - intel_ddi_disable_pipe_clock(old_crtc_state); - /* - * Power down sink before disabling the port, otherwise we end - * up getting interrupts from the sink on detecting link loss. - */ - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + + if (INTEL_GEN(dev_priv) >= 12) { + if (is_mst) { + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; + u32 val; + + val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + val &= ~TGL_TRANS_DDI_PORT_MASK; + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val); + } + } else { + if (!is_mst) + intel_ddi_disable_pipe_clock(old_crtc_state); } intel_disable_ddi_buf(encoder, old_crtc_state); + /* + * From TGL spec: "If single stream or multi-stream master transcoder: + * Configure Transcoder Clock select to direct no clock to the + * transcoder" + */ + if (INTEL_GEN(dev_priv) >= 12) + intel_ddi_disable_pipe_clock(old_crtc_state); + intel_edp_panel_vdd_on(intel_dp); intel_edp_panel_off(intel_dp); - intel_display_power_put_unchecked(dev_priv, - dig_port->ddi_io_power_domain); + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_put_unchecked(dev_priv, + dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); + tgl_clear_psr2_transcoder_exitline(old_crtc_state); } static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, @@ -3316,7 +3787,7 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &dig_port->hdmi; dig_port->set_infoframes(encoder, false, @@ -3334,11 +3805,46 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); } +static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (old_crtc_state->master_transcoder == INVALID_TRANSCODER) + return; + + DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n", + transcoder_name(old_crtc_state->cpu_transcoder)); + + I915_WRITE(TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder), 0); +} + static void intel_ddi_post_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + bool is_tc_port = intel_phy_is_tc(dev_priv, phy); + + if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) { + intel_crtc_vblank_off(old_crtc_state); + + intel_disable_pipe(old_crtc_state); + + if (INTEL_GEN(dev_priv) >= 11) + icl_disable_transcoder_port_sync(old_crtc_state); + + intel_ddi_disable_transcoder_func(old_crtc_state); + + intel_dsc_disable(old_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_scaler_disable(old_crtc_state); + else + ilk_pfit_disable(old_crtc_state); + } /* * When called from DP MST code: @@ -3362,6 +3868,13 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 11) icl_unmap_plls_to_ports(encoder); + + if (intel_crtc_has_dp_encoder(old_crtc_state) || is_tc_port) + intel_display_power_put_unchecked(dev_priv, + intel_ddi_main_link_aux_domain(dig_port)); + + if (is_tc_port) + intel_tc_port_put_link(dig_port); } void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, @@ -3403,7 +3916,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; if (port == PORT_A && INTEL_GEN(dev_priv) < 9) @@ -3411,7 +3924,8 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); - intel_dp_ycbcr_420_enable(intel_dp, crtc_state); + intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); + intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) @@ -3422,12 +3936,12 @@ static i915_reg_t gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, enum port port) { - static const i915_reg_t regs[] = { - [PORT_A] = CHICKEN_TRANS_EDP, - [PORT_B] = CHICKEN_TRANS_A, - [PORT_C] = CHICKEN_TRANS_B, - [PORT_D] = CHICKEN_TRANS_C, - [PORT_E] = CHICKEN_TRANS_A, + static const enum transcoder trans[] = { + [PORT_A] = TRANSCODER_EDP, + [PORT_B] = TRANSCODER_A, + [PORT_C] = TRANSCODER_B, + [PORT_D] = TRANSCODER_C, + [PORT_E] = TRANSCODER_A, }; WARN_ON(INTEL_GEN(dev_priv) < 9); @@ -3435,7 +3949,7 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, if (WARN_ON(port < PORT_A || port > PORT_E)) port = PORT_A; - return regs[port]; + return CHICKEN_TRANS(trans[port]); } static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, @@ -3443,7 +3957,7 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_connector *connector = conn_state->connector; enum port port = encoder->port; @@ -3511,14 +4025,16 @@ static void intel_enable_ddi(struct intel_encoder *encoder, /* Enable hdcp if it's desired */ if (conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) - intel_hdcp_enable(to_intel_connector(conn_state->connector)); + intel_hdcp_enable(to_intel_connector(conn_state->connector), + crtc_state->cpu_transcoder, + (u8)conn_state->hdcp_content_type); } static void intel_disable_ddi_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_dp->link_trained = false; @@ -3566,9 +4082,9 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - intel_ddi_set_pipe_settings(crtc_state); + intel_ddi_set_dp_msa(crtc_state, conn_state); intel_psr_update(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); @@ -3580,44 +4096,68 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_connector *connector = + to_intel_connector(conn_state->connector); + struct intel_hdcp *hdcp = &connector->hdcp; + bool content_protection_type_changed = + (conn_state->hdcp_content_type != hdcp->content_type && + conn_state->content_protection != + DRM_MODE_CONTENT_PROTECTION_UNDESIRED); + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) intel_ddi_update_pipe_dp(encoder, crtc_state, conn_state); + /* + * During the HDCP encryption session if Type change is requested, + * disable the HDCP and reenable it with new TYPE value. + */ if (conn_state->content_protection == - DRM_MODE_CONTENT_PROTECTION_DESIRED) - intel_hdcp_enable(to_intel_connector(conn_state->connector)); - else if (conn_state->content_protection == - DRM_MODE_CONTENT_PROTECTION_UNDESIRED) - intel_hdcp_disable(to_intel_connector(conn_state->connector)); + DRM_MODE_CONTENT_PROTECTION_UNDESIRED || + content_protection_type_changed) + intel_hdcp_disable(connector); + + /* + * Mark the hdcp state as DESIRED after the hdcp disable of type + * change procedure. + */ + if (content_protection_type_changed) { + mutex_lock(&hdcp->mutex); + hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&hdcp->prop_work); + mutex_unlock(&hdcp->mutex); + } + + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED || + content_protection_type_changed) + intel_hdcp_enable(connector, + crtc_state->cpu_transcoder, + (u8)conn_state->hdcp_content_type); } -static void intel_ddi_set_fia_lane_count(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - enum port port) +static void +intel_ddi_update_prepare(struct intel_atomic_state *state, + struct intel_encoder *encoder, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val = I915_READ(PORT_TX_DFLEXDPMLE1); - bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; - - val &= ~DFLEXDPMLE1_DPMLETC_MASK(tc_port); - switch (pipe_config->lane_count) { - case 1: - val |= (lane_reversal) ? DFLEXDPMLE1_DPMLETC_ML3(tc_port) : - DFLEXDPMLE1_DPMLETC_ML0(tc_port); - break; - case 2: - val |= (lane_reversal) ? DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) : - DFLEXDPMLE1_DPMLETC_ML1_0(tc_port); - break; - case 4: - val |= DFLEXDPMLE1_DPMLETC_ML3_0(tc_port); - break; - default: - MISSING_CASE(pipe_config->lane_count); - } - I915_WRITE(PORT_TX_DFLEXDPMLE1, val); + struct intel_crtc_state *crtc_state = + crtc ? intel_atomic_get_new_crtc_state(state, crtc) : NULL; + int required_lanes = crtc_state ? crtc_state->lane_count : 1; + + WARN_ON(crtc && crtc->active); + + intel_tc_port_get_link(enc_to_dig_port(encoder), + required_lanes); + if (crtc_state && crtc_state->hw.active) + intel_update_active_dpll(state, crtc, encoder); +} + +static void +intel_ddi_update_complete(struct intel_atomic_state *state, + struct intel_encoder *encoder, + struct intel_crtc *crtc) +{ + intel_tc_port_put_link(enc_to_dig_port(encoder)); } static void @@ -3626,41 +4166,26 @@ intel_ddi_pre_pll_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - enum port port = encoder->port; + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + bool is_tc_port = intel_phy_is_tc(dev_priv, phy); - if (intel_crtc_has_dp_encoder(crtc_state) || - intel_port_is_tc(dev_priv, encoder->port)) + if (is_tc_port) + intel_tc_port_get_link(dig_port, crtc_state->lane_count); + + if (intel_crtc_has_dp_encoder(crtc_state) || is_tc_port) intel_display_power_get(dev_priv, intel_ddi_main_link_aux_domain(dig_port)); - if (IS_GEN9_LP(dev_priv)) + if (is_tc_port && dig_port->tc_mode != TC_PORT_TBT_ALT) + /* + * Program the lane count for static/dynamic connections on + * Type-C ports. Skip this step for TBT. + */ + intel_tc_port_set_fia_lane_count(dig_port, crtc_state->lane_count); + else if (IS_GEN9_LP(dev_priv)) bxt_ddi_phy_set_lane_optim_mask(encoder, crtc_state->lane_lat_optim_mask); - - /* - * Program the lane count for static/dynamic connections on Type-C ports. - * Skip this step for TBT. - */ - if (dig_port->tc_type == TC_PORT_UNKNOWN || - dig_port->tc_type == TC_PORT_TBT) - return; - - intel_ddi_set_fia_lane_count(encoder, crtc_state, port); -} - -static void -intel_ddi_post_pll_disable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - - if (intel_crtc_has_dp_encoder(crtc_state) || - intel_port_is_tc(dev_priv, encoder->port)) - intel_display_power_put_unchecked(dev_priv, - intel_ddi_main_link_aux_domain(dig_port)); } static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) @@ -3669,38 +4194,39 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->base.port; - u32 val; + u32 dp_tp_ctl, ddi_buf_ctl; bool wait = false; - if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { - val = I915_READ(DDI_BUF_CTL(port)); - if (val & DDI_BUF_CTL_ENABLE) { - val &= ~DDI_BUF_CTL_ENABLE; - I915_WRITE(DDI_BUF_CTL(port), val); + dp_tp_ctl = I915_READ(intel_dp->regs.dp_tp_ctl); + + if (dp_tp_ctl & DP_TP_CTL_ENABLE) { + ddi_buf_ctl = I915_READ(DDI_BUF_CTL(port)); + if (ddi_buf_ctl & DDI_BUF_CTL_ENABLE) { + I915_WRITE(DDI_BUF_CTL(port), + ddi_buf_ctl & ~DDI_BUF_CTL_ENABLE); wait = true; } - val = I915_READ(DP_TP_CTL(port)); - val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); - val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + dp_tp_ctl &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); + dp_tp_ctl |= DP_TP_CTL_LINK_TRAIN_PAT1; + I915_WRITE(intel_dp->regs.dp_tp_ctl, dp_tp_ctl); + POSTING_READ(intel_dp->regs.dp_tp_ctl); if (wait) intel_wait_ddi_buf_idle(dev_priv, port); } - val = DP_TP_CTL_ENABLE | - DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; + dp_tp_ctl = DP_TP_CTL_ENABLE | + DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; if (intel_dp->link_mst) - val |= DP_TP_CTL_MODE_MST; + dp_tp_ctl |= DP_TP_CTL_MODE_MST; else { - val |= DP_TP_CTL_MODE_SST; + dp_tp_ctl |= DP_TP_CTL_MODE_SST; if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) - val |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; + dp_tp_ctl |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, dp_tp_ctl); + POSTING_READ(intel_dp->regs.dp_tp_ctl); intel_dp->DP |= DDI_BUF_CTL_ENABLE; I915_WRITE(DDI_BUF_CTL(port), intel_dp->DP); @@ -3735,15 +4261,16 @@ void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - struct intel_digital_port *intel_dig_port; u32 temp, flags = 0; /* XXX: DSI transcoder paranoia */ if (WARN_ON(transcoder_is_dsi(cpu_transcoder))) return; + intel_dsc_get_config(encoder, pipe_config); + temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (temp & TRANS_DDI_PHSYNC) flags |= DRM_MODE_FLAG_PHSYNC; @@ -3754,7 +4281,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_NVSYNC; - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; switch (temp & TRANS_DDI_BPC_MASK) { case TRANS_DDI_BPC_6: @@ -3776,7 +4303,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder, switch (temp & TRANS_DDI_MODE_SELECT_MASK) { case TRANS_DDI_MODE_SELECT_HDMI: pipe_config->has_hdmi_sink = true; - intel_dig_port = enc_to_dig_port(&encoder->base); pipe_config->infoframes.enable |= intel_hdmi_infoframes_enabled(encoder, pipe_config); @@ -3804,17 +4330,42 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); + + if (INTEL_GEN(dev_priv) >= 11) { + i915_reg_t dp_tp_ctl; + + if (IS_GEN(dev_priv, 11)) + dp_tp_ctl = DP_TP_CTL(encoder->port); + else + dp_tp_ctl = TGL_DP_TP_CTL(pipe_config->cpu_transcoder); + + pipe_config->fec_enable = + I915_READ(dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE; + + DRM_DEBUG_KMS("[ENCODER:%d:%s] Fec status: %u\n", + encoder->base.base.id, encoder->base.name, + pipe_config->fec_enable); + } + break; case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + + if (INTEL_GEN(dev_priv) >= 12) + pipe_config->mst_master_transcoder = + REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); + intel_dp_get_m_n(intel_crtc, pipe_config); break; default: break; } + if (encoder->type == INTEL_OUTPUT_EDP) + tgl_dc3co_exitline_get_config(pipe_config); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); @@ -3884,7 +4435,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int ret; @@ -3892,10 +4443,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) { ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); - else + } else { ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + tgl_dc3co_exitline_compute_config(encoder, pipe_config); + } + if (ret) return ret; @@ -3914,49 +4468,18 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, return 0; } -static void intel_ddi_encoder_suspend(struct intel_encoder *encoder) -{ - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - - intel_dp_encoder_suspend(encoder); - - /* - * TODO: disconnect also from USB DP alternate mode once we have a - * way to handle the modeset restore in that mode during resume - * even if the sink has disappeared while being suspended. - */ - if (dig_port->tc_legacy_port) - icl_tc_phy_disconnect(i915, dig_port); -} - -static void intel_ddi_encoder_reset(struct drm_encoder *drm_encoder) -{ - struct intel_digital_port *dig_port = enc_to_dig_port(drm_encoder); - struct drm_i915_private *i915 = to_i915(drm_encoder->dev); - - if (intel_port_is_tc(i915, dig_port->base.port)) - intel_digital_port_connected(&dig_port->base); - - intel_dp_encoder_reset(drm_encoder); -} - static void intel_ddi_encoder_destroy(struct drm_encoder *encoder) { - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *i915 = to_i915(encoder->dev); + struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder)); intel_dp_encoder_flush_work(encoder); - if (intel_port_is_tc(i915, dig_port->base.port)) - icl_tc_phy_disconnect(i915, dig_port); - drm_encoder_cleanup(encoder); kfree(dig_port); } static const struct drm_encoder_funcs intel_ddi_funcs = { - .reset = intel_ddi_encoder_reset, + .reset = intel_dp_encoder_reset, .destroy = intel_ddi_encoder_destroy, }; @@ -4014,7 +4537,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder); struct intel_connector *connector = hdmi->attached_connector; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); @@ -4046,7 +4569,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder, WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)); - if (!crtc_state->base.active) + if (!crtc_state->hw.active) return 0; if (!crtc_state->hdmi_high_tmds_clock_ratio && @@ -4081,14 +4604,17 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder, return modeset_pipe(&crtc->base, ctx); } -static bool intel_ddi_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector) +static enum intel_hotplug_state +intel_ddi_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received) { + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_modeset_acquire_ctx ctx; - bool changed; + enum intel_hotplug_state state; int ret; - changed = intel_encoder_hotplug(encoder, connector); + state = intel_encoder_hotplug(encoder, connector, irq_received); drm_modeset_acquire_init(&ctx, 0); @@ -4110,7 +4636,27 @@ static bool intel_ddi_hotplug(struct intel_encoder *encoder, drm_modeset_acquire_fini(&ctx); WARN(ret, "Acquiring modeset locks failed with %i\n", ret); - return changed; + /* + * Unpowered type-c dongles can take some time to boot and be + * responsible, so here giving some time to those dongles to power up + * and then retrying the probe. + * + * On many platforms the HDMI live state signal is known to be + * unreliable, so we can't use it to detect if a sink is connected or + * not. Instead we detect if it's connected based on whether we can + * read the EDID or not. That in turn has a problem during disconnect, + * since the HPD interrupt may be raised before the DDC lines get + * disconnected (due to how the required length of DDC vs. HPD + * connector pins are specified) and so we'll still be able to get a + * valid EDID. To solve this schedule another detection cycle if this + * time around we didn't detect any change in the sink's connection + * status. + */ + if (state == INTEL_HOTPLUG_UNCHANGED && irq_received && + !dig_port->dp.is_mst) + state = INTEL_HOTPLUG_RETRY; + + return state; } static struct intel_connector * @@ -4194,10 +4740,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) struct ddi_vbt_port_info *port_info = &dev_priv->vbt.ddi_port_info[port]; struct intel_digital_port *intel_dig_port; - struct intel_encoder *intel_encoder; - struct drm_encoder *encoder; + struct intel_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; - enum pipe pipe; + enum phy phy = intel_port_to_phy(dev_priv, port); init_hdmi = port_info->supports_dvi || port_info->supports_hdmi; init_dp = port_info->supports_dp; @@ -4224,32 +4769,30 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) if (!intel_dig_port) return; - intel_encoder = &intel_dig_port->base; - encoder = &intel_encoder->base; + encoder = &intel_dig_port->base; - drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, + drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); - intel_encoder->hotplug = intel_ddi_hotplug; - intel_encoder->compute_output_type = intel_ddi_compute_output_type; - intel_encoder->compute_config = intel_ddi_compute_config; - intel_encoder->enable = intel_enable_ddi; - intel_encoder->pre_pll_enable = intel_ddi_pre_pll_enable; - intel_encoder->post_pll_disable = intel_ddi_post_pll_disable; - intel_encoder->pre_enable = intel_ddi_pre_enable; - intel_encoder->disable = intel_disable_ddi; - intel_encoder->post_disable = intel_ddi_post_disable; - intel_encoder->update_pipe = intel_ddi_update_pipe; - intel_encoder->get_hw_state = intel_ddi_get_hw_state; - intel_encoder->get_config = intel_ddi_get_config; - intel_encoder->suspend = intel_ddi_encoder_suspend; - intel_encoder->get_power_domains = intel_ddi_get_power_domains; - intel_encoder->type = INTEL_OUTPUT_DDI; - intel_encoder->power_domain = intel_port_to_power_domain(port); - intel_encoder->port = port; - intel_encoder->cloneable = 0; - for_each_pipe(dev_priv, pipe) - intel_encoder->crtc_mask |= BIT(pipe); + encoder->hotplug = intel_ddi_hotplug; + encoder->compute_output_type = intel_ddi_compute_output_type; + encoder->compute_config = intel_ddi_compute_config; + encoder->enable = intel_enable_ddi; + encoder->pre_pll_enable = intel_ddi_pre_pll_enable; + encoder->pre_enable = intel_ddi_pre_enable; + encoder->disable = intel_disable_ddi; + encoder->post_disable = intel_ddi_post_disable; + encoder->update_pipe = intel_ddi_update_pipe; + encoder->get_hw_state = intel_ddi_get_hw_state; + encoder->get_config = intel_ddi_get_config; + encoder->suspend = intel_dp_encoder_suspend; + encoder->get_power_domains = intel_ddi_get_power_domains; + + encoder->type = INTEL_OUTPUT_DDI; + encoder->power_domain = intel_port_to_power_domain(port); + encoder->port = port; + encoder->cloneable = 0; + encoder->pipe_mask = ~0; if (INTEL_GEN(dev_priv) >= 11) intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & @@ -4257,43 +4800,25 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) else intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); intel_dig_port->aux_ch = intel_bios_port_aux_ch(dev_priv, port); - intel_dig_port->tc_legacy_port = intel_port_is_tc(dev_priv, port) && - !port_info->supports_typec_usb && - !port_info->supports_tbt; + if (intel_phy_is_tc(dev_priv, phy)) { + bool is_legacy = !port_info->supports_typec_usb && + !port_info->supports_tbt; - switch (port) { - case PORT_A: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_A_IO; - break; - case PORT_B: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_B_IO; - break; - case PORT_C: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_C_IO; - break; - case PORT_D: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_D_IO; - break; - case PORT_E: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_E_IO; - break; - case PORT_F: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_F_IO; - break; - default: - MISSING_CASE(port); + intel_tc_port_init(intel_dig_port, is_legacy); + + encoder->update_prepare = intel_ddi_update_prepare; + encoder->update_complete = intel_ddi_update_complete; } + WARN_ON(port > PORT_I); + intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_A_IO + + port - PORT_A; + if (init_dp) { if (!intel_ddi_init_dp_connector(intel_dig_port)) goto err; @@ -4303,7 +4828,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) /* In theory we don't need the encoder->type check, but leave it just in * case we have some really bad VBTs... */ - if (intel_encoder->type != INTEL_OUTPUT_EDP && init_hdmi) { + if (encoder->type != INTEL_OUTPUT_EDP && init_hdmi) { if (!intel_ddi_init_hdmi_connector(intel_dig_port)) goto err; } @@ -4324,12 +4849,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_infoframe_init(intel_dig_port); - if (intel_port_is_tc(dev_priv, port)) - intel_digital_port_connected(intel_encoder); - return; err: - drm_encoder_cleanup(encoder); + drm_encoder_cleanup(&encoder->base); kfree(intel_dig_port); } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index a08365da2643..167c6579d972 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -22,7 +22,7 @@ struct intel_encoder; void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state); -void hsw_fdi_link_train(struct intel_crtc *crtc, +void hsw_fdi_link_train(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); @@ -30,7 +30,8 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 592b92782fab..064dd99bbc49 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -29,9 +29,8 @@ #include <linux/intel-iommu.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/slab.h> -#include <linux/vgaarb.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -47,6 +46,7 @@ #include "display/intel_crt.h" #include "display/intel_ddi.h" #include "display/intel_dp.h" +#include "display/intel_dp_mst.h" #include "display/intel_dsi.h" #include "display/intel_dvo.h" #include "display/intel_gmbus.h" @@ -56,15 +56,18 @@ #include "display/intel_tv.h" #include "display/intel_vdsc.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_trace.h" #include "intel_acpi.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" #include "intel_bw.h" -#include "intel_color.h" #include "intel_cdclk.h" -#include "intel_drv.h" +#include "intel_color.h" +#include "intel_display_types.h" +#include "intel_dp_link_training.h" #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fifo_underrun.h" @@ -78,16 +81,28 @@ #include "intel_quirks.h" #include "intel_sideband.h" #include "intel_sprite.h" +#include "intel_tc.h" +#include "intel_vga.h" /* Primary plane formats for gen <= 3 */ static const u32 i8xx_primary_formats[] = { DRM_FORMAT_C8, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, +}; + +/* Primary plane formats for ivb (no fp16 due to hw issue) */ +static const u32 ivb_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, }; -/* Primary plane formats for gen >= 4 */ +/* Primary plane formats for gen >= 4, except ivb */ static const u32 i965_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, @@ -95,6 +110,22 @@ static const u32 i965_primary_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XBGR16161616F, +}; + +/* Primary plane formats for vlv/chv */ +static const u32 vlv_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XBGR16161616F, }; static const u64 i9xx_format_modifiers[] = { @@ -115,8 +146,8 @@ static const u64 cursor_format_modifiers[] = { static void i9xx_crtc_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static void ironlake_pch_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config); +static void ilk_pch_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); static int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_i915_gem_object *obj, @@ -127,23 +158,18 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta const struct intel_link_m_n *m_n, const struct intel_link_m_n *m2_n2); static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state); -static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state); -static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state); +static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state); +static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state); static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state); static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); -static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); -static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); -static void intel_crtc_init_scalers(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state); -static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state); -static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state); -static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state); +static void skl_pfit_enable(const struct intel_crtc_state *crtc_state); +static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state); static void intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); -static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); +static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc); struct intel_limit { struct { @@ -344,7 +370,7 @@ static const struct intel_limit intel_limits_g4x_dual_channel_lvds = { }, }; -static const struct intel_limit intel_limits_pineview_sdvo = { +static const struct intel_limit pnv_limits_sdvo = { .dot = { .min = 20000, .max = 400000}, .vco = { .min = 1700000, .max = 3500000 }, /* Pineview's Ncounter is a ring counter */ @@ -359,7 +385,7 @@ static const struct intel_limit intel_limits_pineview_sdvo = { .p2_slow = 10, .p2_fast = 5 }, }; -static const struct intel_limit intel_limits_pineview_lvds = { +static const struct intel_limit pnv_limits_lvds = { .dot = { .min = 20000, .max = 400000 }, .vco = { .min = 1700000, .max = 3500000 }, .n = { .min = 3, .max = 6 }, @@ -377,7 +403,7 @@ static const struct intel_limit intel_limits_pineview_lvds = { * We calculate clock using (register_value + 2) for N/M1/M2, so here * the range value for them is (actual_value - 2). */ -static const struct intel_limit intel_limits_ironlake_dac = { +static const struct intel_limit ilk_limits_dac = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 5 }, @@ -390,7 +416,7 @@ static const struct intel_limit intel_limits_ironlake_dac = { .p2_slow = 10, .p2_fast = 5 }, }; -static const struct intel_limit intel_limits_ironlake_single_lvds = { +static const struct intel_limit ilk_limits_single_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -403,7 +429,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds = { .p2_slow = 14, .p2_fast = 14 }, }; -static const struct intel_limit intel_limits_ironlake_dual_lvds = { +static const struct intel_limit ilk_limits_dual_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -417,7 +443,7 @@ static const struct intel_limit intel_limits_ironlake_dual_lvds = { }; /* LVDS 100mhz refclk limits. */ -static const struct intel_limit intel_limits_ironlake_single_lvds_100m = { +static const struct intel_limit ilk_limits_single_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 2 }, @@ -430,7 +456,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds_100m = { .p2_slow = 14, .p2_fast = 14 }, }; -static const struct intel_limit intel_limits_ironlake_dual_lvds_100m = { +static const struct intel_limit ilk_limits_dual_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -489,7 +515,7 @@ static const struct intel_limit intel_limits_bxt = { /* WA Display #0827: Gen9:all */ static void -skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) +skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { if (enable) I915_WRITE(CLKGATE_DIS_PSL(pipe), @@ -515,9 +541,22 @@ icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, } static bool -needs_modeset(const struct drm_crtc_state *state) +needs_modeset(const struct intel_crtc_state *state) { - return drm_atomic_crtc_needs_modeset(state); + return drm_atomic_crtc_needs_modeset(&state->uapi); +} + +bool +is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state) +{ + return (crtc_state->master_transcoder != INVALID_TRANSCODER || + crtc_state->sync_mode_slaves_mask); +} + +static bool +is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state) +{ + return crtc_state->master_transcoder != INVALID_TRANSCODER; } /* @@ -631,7 +670,7 @@ i9xx_select_p2_div(const struct intel_limit *limit, const struct intel_crtc_state *crtc_state, int target) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS)) { /* @@ -667,7 +706,7 @@ i9xx_find_best_dpll(const struct intel_limit *limit, int target, int refclk, struct dpll *match_clock, struct dpll *best_clock) { - struct drm_device *dev = crtc_state->base.crtc->dev; + struct drm_device *dev = crtc_state->uapi.crtc->dev; struct dpll clock; int err = target; @@ -725,7 +764,7 @@ pnv_find_best_dpll(const struct intel_limit *limit, int target, int refclk, struct dpll *match_clock, struct dpll *best_clock) { - struct drm_device *dev = crtc_state->base.crtc->dev; + struct drm_device *dev = crtc_state->uapi.crtc->dev; struct dpll clock; int err = target; @@ -781,7 +820,7 @@ g4x_find_best_dpll(const struct intel_limit *limit, int target, int refclk, struct dpll *match_clock, struct dpll *best_clock) { - struct drm_device *dev = crtc_state->base.crtc->dev; + struct drm_device *dev = crtc_state->uapi.crtc->dev; struct dpll clock; int max_n; bool found = false; @@ -875,7 +914,7 @@ vlv_find_best_dpll(const struct intel_limit *limit, int target, int refclk, struct dpll *match_clock, struct dpll *best_clock) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; struct dpll clock; unsigned int bestppm = 1000000; @@ -935,7 +974,7 @@ chv_find_best_dpll(const struct intel_limit *limit, int target, int refclk, struct dpll *match_clock, struct dpll *best_clock) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; unsigned int best_error_ppm; struct dpll clock; @@ -998,33 +1037,6 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, NULL, best_clock); } -bool intel_crtc_active(struct intel_crtc *crtc) -{ - /* Be paranoid as we can arrive here with only partial - * state retrieved from the hardware during setup. - * - * We can ditch the adjusted_mode.crtc_clock check as soon - * as Haswell has gained clock readout/fastboot support. - * - * We can ditch the crtc->primary->state->fb check as soon as we can - * properly reconstruct framebuffers. - * - * FIXME: The intel_crtc->active here should be switched to - * crtc->state->active once we have proper CRTC states wired up - * for atomic. - */ - return crtc->active && crtc->base.primary->state->fb && - crtc->config->base.adjusted_mode.crtc_clock; -} - -enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - return crtc->config->cpu_transcoder; -} - static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -1068,7 +1080,7 @@ static void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc) static void intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (INTEL_GEN(dev_priv) >= 4) { @@ -1076,9 +1088,8 @@ intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state) i915_reg_t reg = PIPECONF(cpu_transcoder); /* Wait for the Pipe State to go off */ - if (intel_wait_for_register(&dev_priv->uncore, - reg, I965_PIPECONF_ACTIVE, 0, - 100)) + if (intel_de_wait_for_clear(dev_priv, reg, + I965_PIPECONF_ACTIVE, 100)) WARN(1, "pipe_off wait timed out\n"); } else { intel_wait_for_pipe_scanline_stopped(crtc); @@ -1119,11 +1130,15 @@ static void assert_fdi_tx(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { bool cur_state; - enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, - pipe); if (HAS_DDI(dev_priv)) { - /* DDI does not have a specific FDI_TX register */ + /* + * DDI does not have a specific FDI_TX register. + * + * FDI is never fed from EDP transcoder + * so pipe->transcoder cast is fine here. + */ + enum transcoder cpu_transcoder = (enum transcoder)pipe; u32 val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); cur_state = !!(val & TRANS_DDI_FUNC_ENABLE); } else { @@ -1240,11 +1255,9 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) } void assert_pipe(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) + enum transcoder cpu_transcoder, bool state) { bool cur_state; - enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, - pipe); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; @@ -1264,8 +1277,9 @@ void assert_pipe(struct drm_i915_private *dev_priv, } I915_STATE_WARN(cur_state != state, - "pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); + "transcoder %s assertion failure (expected %s, current %s)\n", + transcoder_name(cpu_transcoder), + onoff(state), onoff(cur_state)); } static void assert_plane(struct intel_plane *plane, bool state) @@ -1382,11 +1396,7 @@ static void _vlv_enable_pll(struct intel_crtc *crtc, POSTING_READ(DPLL(pipe)); udelay(150); - if (intel_wait_for_register(&dev_priv->uncore, - DPLL(pipe), - DPLL_LOCK_VLV, - DPLL_LOCK_VLV, - 1)) + if (intel_de_wait_for_set(dev_priv, DPLL(pipe), DPLL_LOCK_VLV, 1)) DRM_ERROR("DPLL %d failed to lock\n", pipe); } @@ -1396,7 +1406,7 @@ static void vlv_enable_pll(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - assert_pipe_disabled(dev_priv, pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); /* PLL is protected by panel, make sure we can write it */ assert_panel_unlocked(dev_priv, pipe); @@ -1435,9 +1445,7 @@ static void _chv_enable_pll(struct intel_crtc *crtc, I915_WRITE(DPLL(pipe), pipe_config->dpll_hw_state.dpll); /* Check PLL is locked */ - if (intel_wait_for_register(&dev_priv->uncore, - DPLL(pipe), DPLL_LOCK_VLV, DPLL_LOCK_VLV, - 1)) + if (intel_de_wait_for_set(dev_priv, DPLL(pipe), DPLL_LOCK_VLV, 1)) DRM_ERROR("PLL %d failed to lock\n", pipe); } @@ -1447,7 +1455,7 @@ static void chv_enable_pll(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - assert_pipe_disabled(dev_priv, pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); /* PLL is protected by panel, make sure we can write it */ assert_panel_unlocked(dev_priv, pipe); @@ -1494,7 +1502,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc, u32 dpll = crtc_state->dpll_hw_state.dpll; int i; - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder); /* PLL is protected by panel, make sure we can write it */ if (i9xx_has_pps(dev_priv)) @@ -1534,7 +1542,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc, static void i9xx_disable_pll(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -1543,7 +1551,7 @@ static void i9xx_disable_pll(const struct intel_crtc_state *crtc_state) return; /* Make sure the pipe isn't still relying on us */ - assert_pipe_disabled(dev_priv, pipe); + assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder); I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS); POSTING_READ(DPLL(pipe)); @@ -1554,7 +1562,7 @@ static void vlv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) u32 val; /* Make sure the pipe isn't still relying on us */ - assert_pipe_disabled(dev_priv, pipe); + assert_pipe_disabled(dev_priv, (enum transcoder)pipe); val = DPLL_INTEGRATED_REF_CLK_VLV | DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS; @@ -1571,7 +1579,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) u32 val; /* Make sure the pipe isn't still relying on us */ - assert_pipe_disabled(dev_priv, pipe); + assert_pipe_disabled(dev_priv, (enum transcoder)pipe); val = DPLL_SSC_REF_CLK_CHV | DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS; @@ -1616,17 +1624,16 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, BUG(); } - if (intel_wait_for_register(&dev_priv->uncore, - dpll_reg, port_mask, expected_mask, - 1000)) - WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n", - port_name(dport->base.port), + if (intel_de_wait_for_register(dev_priv, dpll_reg, + port_mask, expected_mask, 1000)) + WARN(1, "timed out waiting for [ENCODER:%d:%s] port ready: got 0x%x, expected 0x%x\n", + dport->base.base.base.id, dport->base.base.name, I915_READ(dpll_reg) & port_mask, expected_mask); } -static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) +static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; @@ -1640,11 +1647,16 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s assert_fdi_rx_enabled(dev_priv, pipe); if (HAS_PCH_CPT(dev_priv)) { - /* Workaround: Set the timing override bit before enabling the - * pch transcoder. */ reg = TRANS_CHICKEN2(pipe); val = I915_READ(reg); + /* + * Workaround: Set the timing override bit + * before enabling the pch transcoder. + */ val |= TRANS_CHICKEN2_TIMING_OVERRIDE; + /* Configure frame start delay to match the CPU */ + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val |= TRANS_CHICKEN2_FRAME_START_DELAY(0); I915_WRITE(reg, val); } @@ -1653,6 +1665,10 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s pipeconf_val = I915_READ(PIPECONF(pipe)); if (HAS_PCH_IBX(dev_priv)) { + /* Configure frame start delay to match the CPU */ + val &= ~TRANS_FRAME_START_DELAY_MASK; + val |= TRANS_FRAME_START_DELAY(0); + /* * Make the BPC in transcoder be consistent with * that in pipeconf reg. For HDMI we must use 8bpc @@ -1677,9 +1693,7 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s } I915_WRITE(reg, val | TRANS_ENABLE); - if (intel_wait_for_register(&dev_priv->uncore, - reg, TRANS_STATE_ENABLE, TRANS_STATE_ENABLE, - 100)) + if (intel_de_wait_for_set(dev_priv, reg, TRANS_STATE_ENABLE, 100)) DRM_ERROR("failed to enable transcoder %c\n", pipe_name(pipe)); } @@ -1692,9 +1706,12 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv, assert_fdi_tx_enabled(dev_priv, (enum pipe) cpu_transcoder); assert_fdi_rx_enabled(dev_priv, PIPE_A); - /* Workaround: set timing override bit. */ val = I915_READ(TRANS_CHICKEN2(PIPE_A)); + /* Workaround: set timing override bit. */ val |= TRANS_CHICKEN2_TIMING_OVERRIDE; + /* Configure frame start delay to match the CPU */ + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val |= TRANS_CHICKEN2_FRAME_START_DELAY(0); I915_WRITE(TRANS_CHICKEN2(PIPE_A), val); val = TRANS_ENABLE; @@ -1707,16 +1724,13 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv, val |= TRANS_PROGRESSIVE; I915_WRITE(LPT_TRANSCONF, val); - if (intel_wait_for_register(&dev_priv->uncore, - LPT_TRANSCONF, - TRANS_STATE_ENABLE, - TRANS_STATE_ENABLE, - 100)) + if (intel_de_wait_for_set(dev_priv, LPT_TRANSCONF, + TRANS_STATE_ENABLE, 100)) DRM_ERROR("Failed to enable PCH transcoder\n"); } -static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void ilk_disable_pch_transcoder(struct drm_i915_private *dev_priv, + enum pipe pipe) { i915_reg_t reg; u32 val; @@ -1733,9 +1747,7 @@ static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, val &= ~TRANS_ENABLE; I915_WRITE(reg, val); /* wait for PCH transcoder off, transcoder state */ - if (intel_wait_for_register(&dev_priv->uncore, - reg, TRANS_STATE_ENABLE, 0, - 50)) + if (intel_de_wait_for_clear(dev_priv, reg, TRANS_STATE_ENABLE, 50)) DRM_ERROR("failed to disable transcoder %c\n", pipe_name(pipe)); if (HAS_PCH_CPT(dev_priv)) { @@ -1755,9 +1767,8 @@ void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv) val &= ~TRANS_ENABLE; I915_WRITE(LPT_TRANSCONF, val); /* wait for PCH transcoder off, transcoder state */ - if (intel_wait_for_register(&dev_priv->uncore, - LPT_TRANSCONF, TRANS_STATE_ENABLE, 0, - 50)) + if (intel_de_wait_for_clear(dev_priv, LPT_TRANSCONF, + TRANS_STATE_ENABLE, 50)) DRM_ERROR("Failed to disable PCH transcoder\n"); /* Workaround: clear timing override bit. */ @@ -1778,7 +1789,7 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); /* * On i965gm the hardware frame counter reads @@ -1798,16 +1809,25 @@ static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + assert_vblank_disabled(&crtc->base); drm_crtc_set_max_vblank_count(&crtc->base, intel_crtc_max_vblank_count(crtc_state)); drm_crtc_vblank_on(&crtc->base); } +void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + drm_crtc_vblank_off(&crtc->base); + assert_vblank_disabled(&crtc->base); +} + static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; @@ -1863,9 +1883,9 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) intel_wait_for_pipe_scanline_moving(crtc); } -static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) +void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; @@ -1908,6 +1928,74 @@ static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv) return IS_GEN(dev_priv, 2) ? 2048 : 4096; } +static bool is_ccs_plane(const struct drm_framebuffer *fb, int plane) +{ + if (!is_ccs_modifier(fb->modifier)) + return false; + + return plane >= fb->format->num_planes / 2; +} + +static bool is_gen12_ccs_modifier(u64 modifier) +{ + return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; + +} + +static bool is_gen12_ccs_plane(const struct drm_framebuffer *fb, int plane) +{ + return is_gen12_ccs_modifier(fb->modifier) && is_ccs_plane(fb, plane); +} + +static bool is_aux_plane(const struct drm_framebuffer *fb, int plane) +{ + if (is_ccs_modifier(fb->modifier)) + return is_ccs_plane(fb, plane); + + return plane == 1; +} + +static int main_to_ccs_plane(const struct drm_framebuffer *fb, int main_plane) +{ + WARN_ON(!is_ccs_modifier(fb->modifier) || + (main_plane && main_plane >= fb->format->num_planes / 2)); + + return fb->format->num_planes / 2 + main_plane; +} + +static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane) +{ + WARN_ON(!is_ccs_modifier(fb->modifier) || + ccs_plane < fb->format->num_planes / 2); + + return ccs_plane - fb->format->num_planes / 2; +} + +/* Return either the main plane's CCS or - if not a CCS FB - UV plane */ +int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane) +{ + if (is_ccs_modifier(fb->modifier)) + return main_to_ccs_plane(fb, main_plane); + + return 1; +} + +bool +intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, + uint64_t modifier) +{ + return info->is_yuv && + info->num_planes == (is_ccs_modifier(modifier) ? 4 : 2); +} + +static bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb, + int color_plane) +{ + return intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && + color_plane == 1; +} + static unsigned int intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) { @@ -1923,16 +2011,21 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) else return 512; case I915_FORMAT_MOD_Y_TILED_CCS: - if (color_plane == 1) + if (is_ccs_plane(fb, color_plane)) return 128; /* fall through */ + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + if (is_ccs_plane(fb, color_plane)) + return 64; + /* fall through */ case I915_FORMAT_MOD_Y_TILED: if (IS_GEN(dev_priv, 2) || HAS_128_BYTE_Y_TILING(dev_priv)) return 128; else return 512; case I915_FORMAT_MOD_Yf_TILED_CCS: - if (color_plane == 1) + if (is_ccs_plane(fb, color_plane)) return 128; /* fall through */ case I915_FORMAT_MOD_Yf_TILED: @@ -1959,6 +2052,9 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) static unsigned int intel_tile_height(const struct drm_framebuffer *fb, int color_plane) { + if (is_gen12_ccs_plane(fb, color_plane)) + return 1; + return intel_tile_size(to_i915(fb->dev)) / intel_tile_width_bytes(fb, color_plane); } @@ -1972,7 +2068,17 @@ static void intel_tile_dims(const struct drm_framebuffer *fb, int color_plane, unsigned int cpp = fb->format->cpp[color_plane]; *tile_width = tile_width_bytes / cpp; - *tile_height = intel_tile_size(to_i915(fb->dev)) / tile_width_bytes; + *tile_height = intel_tile_height(fb, color_plane); +} + +static unsigned int intel_tile_row_size(const struct drm_framebuffer *fb, + int color_plane) +{ + unsigned int tile_width, tile_height; + + intel_tile_dims(fb, color_plane, &tile_width, &tile_height); + + return fb->pitches[color_plane] * tile_height; } unsigned int @@ -2049,7 +2155,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(fb->dev); /* AUX_DIST needs only 4K alignment */ - if (color_plane == 1) + if ((INTEL_GEN(dev_priv) < 12 && is_aux_plane(fb, color_plane)) || + is_ccs_plane(fb, color_plane)) return 4096; switch (fb->modifier) { @@ -2059,9 +2166,19 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; return 0; + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + if (is_semiplanar_uv_plane(fb, color_plane)) + return intel_tile_row_size(fb, color_plane); + /* Fall-through */ + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + return 16 * 1024; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Y_TILED: + if (INTEL_GEN(dev_priv) >= 12 && + is_semiplanar_uv_plane(fb, color_plane)) + return intel_tile_row_size(fb, color_plane); + /* Fall-through */ case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; default: @@ -2072,7 +2189,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); return INTEL_GEN(dev_priv) < 4 || @@ -2094,9 +2211,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int pinctl; u32 alignment; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) + return ERR_PTR(-EINVAL); alignment = intel_surf_alignment(fb, 0); + if (WARN_ON(alignment && !is_power_of_2(alignment))) + return ERR_PTR(-EINVAL); /* Note that the w/a also requires 64 PTE of padding following the * bo. We currently fill all unused PTE with the shadow page and so @@ -2114,19 +2234,18 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * pin/unpin/fence and not more. */ wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - i915_gem_object_lock(obj); atomic_inc(&dev_priv->gpu_error.pending_fb_pin); - pinctl = 0; - - /* Valleyview is definitely limited to scanning out the first + /* + * Valleyview is definitely limited to scanning out the first * 512MiB. Lets presume this behaviour was inherited from the * g4x display engine and that all earlier gen are similarly * limited. Testing suggests that it is a little more * complicated than this. For example, Cherryview appears quite * happy to scanout from anywhere within its global aperture. */ + pinctl = 0; if (HAS_GMCH(dev_priv)) pinctl |= PIN_MAPPABLE; @@ -2138,7 +2257,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { int ret; - /* Install a fence for tiled scan-out. Pre-i965 always needs a + /* + * Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always, when * possible, install a fence as the cost is not that onerous. @@ -2168,16 +2288,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, i915_vma_get(vma); err: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); - - i915_gem_object_unlock(obj); intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return vma; } void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); @@ -2206,7 +2322,7 @@ u32 intel_fb_xy_to_linear(int x, int y, const struct intel_plane_state *state, int color_plane) { - const struct drm_framebuffer *fb = state->base.fb; + const struct drm_framebuffer *fb = state->hw.fb; unsigned int cpp = fb->format->cpp[color_plane]; unsigned int pitch = state->color_plane[color_plane].stride; @@ -2254,9 +2370,10 @@ static u32 intel_adjust_tile_offset(int *x, int *y, return new_offset; } -static bool is_surface_linear(u64 modifier, int color_plane) +static bool is_surface_linear(const struct drm_framebuffer *fb, int color_plane) { - return modifier == DRM_FORMAT_MOD_LINEAR; + return fb->modifier == DRM_FORMAT_MOD_LINEAR || + is_gen12_ccs_plane(fb, color_plane); } static u32 intel_adjust_aligned_offset(int *x, int *y, @@ -2271,7 +2388,7 @@ static u32 intel_adjust_aligned_offset(int *x, int *y, WARN_ON(new_offset > old_offset); - if (!is_surface_linear(fb->modifier, color_plane)) { + if (!is_surface_linear(fb, color_plane)) { unsigned int tile_size, tile_width, tile_height; unsigned int pitch_tiles; @@ -2307,8 +2424,8 @@ static u32 intel_plane_adjust_aligned_offset(int *x, int *y, int color_plane, u32 old_offset, u32 new_offset) { - return intel_adjust_aligned_offset(x, y, state->base.fb, color_plane, - state->base.rotation, + return intel_adjust_aligned_offset(x, y, state->hw.fb, color_plane, + state->hw.rotation, state->color_plane[color_plane].stride, old_offset, new_offset); } @@ -2338,10 +2455,7 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv, unsigned int cpp = fb->format->cpp[color_plane]; u32 offset, offset_aligned; - if (alignment) - alignment--; - - if (!is_surface_linear(fb->modifier, color_plane)) { + if (!is_surface_linear(fb, color_plane)) { unsigned int tile_size, tile_width, tile_height; unsigned int tile_rows, tiles, pitch_tiles; @@ -2362,17 +2476,24 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv, *x %= tile_width; offset = (tile_rows * pitch_tiles + tiles) * tile_size; - offset_aligned = offset & ~alignment; + + offset_aligned = offset; + if (alignment) + offset_aligned = rounddown(offset_aligned, alignment); intel_adjust_tile_offset(x, y, tile_width, tile_height, tile_size, pitch_tiles, offset, offset_aligned); } else { offset = *y * pitch + *x * cpp; - offset_aligned = offset & ~alignment; - - *y = (offset & alignment) / pitch; - *x = ((offset & alignment) - *y * pitch) / cpp; + offset_aligned = offset; + if (alignment) { + offset_aligned = rounddown(offset_aligned, alignment); + *y = (offset % alignment) / pitch; + *x = ((offset % alignment) - *y * pitch) / cpp; + } else { + *y = *x = 0; + } } return offset_aligned; @@ -2382,10 +2503,10 @@ static u32 intel_plane_compute_aligned_offset(int *x, int *y, const struct intel_plane_state *state, int color_plane) { - struct intel_plane *intel_plane = to_intel_plane(state->base.plane); + struct intel_plane *intel_plane = to_intel_plane(state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); - const struct drm_framebuffer *fb = state->base.fb; - unsigned int rotation = state->base.rotation; + const struct drm_framebuffer *fb = state->hw.fb; + unsigned int rotation = state->hw.rotation; int pitch = state->color_plane[color_plane].stride; u32 alignment; @@ -2405,9 +2526,17 @@ static int intel_fb_offset_to_xy(int *x, int *y, { struct drm_i915_private *dev_priv = to_i915(fb->dev); unsigned int height; + u32 alignment; - if (fb->modifier != DRM_FORMAT_MOD_LINEAR && - fb->offsets[color_plane] % intel_tile_size(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12 && + is_semiplanar_uv_plane(fb, color_plane)) + alignment = intel_tile_row_size(fb, color_plane); + else if (fb->modifier != DRM_FORMAT_MOD_LINEAR) + alignment = intel_tile_size(dev_priv); + else + alignment = 0; + + if (alignment != 0 && fb->offsets[color_plane] % alignment) { DRM_DEBUG_KMS("Misaligned offset 0x%08x for color plane %d\n", fb->offsets[color_plane], color_plane); return -EINVAL; @@ -2443,6 +2572,8 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) return I915_TILING_X; case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: return I915_TILING_Y; default: return I915_TILING_NONE; @@ -2463,7 +2594,7 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) * us a ratio of one byte in the CCS for each 8x16 pixels in the * main surface. */ -static const struct drm_format_info ccs_formats[] = { +static const struct drm_format_info skl_ccs_formats[] = { { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, @@ -2474,6 +2605,52 @@ static const struct drm_format_info ccs_formats[] = { .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, }, }; +/* + * Gen-12 compression uses 4 bits of CCS data for each cache line pair in the + * main surface. And each 64B CCS cache line represents an area of 4x1 Y-tiles + * in the main surface. With 4 byte pixels and each Y-tile having dimensions of + * 32x32 pixels, the ratio turns out to 1B in the CCS for every 2x32 pixels in + * the main surface. + */ +static const struct drm_format_info gen12_ccs_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, + .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, + .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, + .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 1, .vsub = 1, .has_alpha = true }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, + .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 1, .vsub = 1, .has_alpha = true }, + { .format = DRM_FORMAT_YUYV, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_YVYU, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_UYVY, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_VYUY, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_NV12, .num_planes = 4, + .char_per_block = { 1, 2, 1, 1 }, .block_w = { 1, 1, 4, 4 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P010, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P012, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P016, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, +}; + static const struct drm_format_info * lookup_format_info(const struct drm_format_info formats[], int num_formats, u32 format) @@ -2494,8 +2671,13 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd) switch (cmd->modifier[0]) { case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: - return lookup_format_info(ccs_formats, - ARRAY_SIZE(ccs_formats), + return lookup_format_info(skl_ccs_formats, + ARRAY_SIZE(skl_ccs_formats), + cmd->pixel_format); + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + return lookup_format_info(gen12_ccs_formats, + ARRAY_SIZE(gen12_ccs_formats), cmd->pixel_format); default: return NULL; @@ -2504,10 +2686,18 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd) bool is_ccs_modifier(u64 modifier) { - return modifier == I915_FORMAT_MOD_Y_TILED_CCS || + return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS; } +static int gen12_ccs_aux_stride(struct drm_framebuffer *fb, int ccs_plane) +{ + return DIV_ROUND_UP(fb->pitches[ccs_to_main_plane(fb, ccs_plane)], + 512) * 64; +} + u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier) { @@ -2519,6 +2709,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all. */ crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + if (!crtc) + return 0; + plane = to_intel_plane(crtc->base.primary); return plane->max_stride(plane, pixel_format, modifier, @@ -2549,8 +2742,9 @@ static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) { struct drm_i915_private *dev_priv = to_i915(fb->dev); + u32 tile_width; - if (fb->modifier == DRM_FORMAT_MOD_LINEAR) { + if (is_surface_linear(fb, color_plane)) { u32 max_stride = intel_plane_fb_max_stride(dev_priv, fb->format->format, fb->modifier); @@ -2559,20 +2753,41 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) * To make remapping with linear generally feasible * we need the stride to be page aligned. */ - if (fb->pitches[color_plane] > max_stride) + if (fb->pitches[color_plane] > max_stride && + !is_ccs_modifier(fb->modifier)) return intel_tile_size(dev_priv); else return 64; - } else { - return intel_tile_width_bytes(fb, color_plane); } + + tile_width = intel_tile_width_bytes(fb, color_plane); + if (is_ccs_modifier(fb->modifier)) { + /* + * Display WA #0531: skl,bxt,kbl,glk + * + * Render decompression and plane width > 3840 + * combined with horizontal panning requires the + * plane stride to be a multiple of 4. We'll just + * require the entire fb to accommodate that to avoid + * potential runtime errors at plane configuration time. + */ + if (IS_GEN(dev_priv, 9) && color_plane == 0 && fb->width > 3840) + tile_width *= 4; + /* + * The main surface pitch must be padded to a multiple of four + * tile widths. + */ + else if (INTEL_GEN(dev_priv) >= 12) + tile_width *= 4; + } + return tile_width; } bool intel_plane_can_remap(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int i; /* We don't want to deal with remapping with cursors */ @@ -2610,16 +2825,16 @@ bool intel_plane_can_remap(const struct intel_plane_state *plane_state) static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; u32 stride, max_stride; /* * No remapping for invisible planes since we don't have * an actual source viewport to remap. */ - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return false; if (!intel_plane_can_remap(plane_state)) @@ -2636,12 +2851,171 @@ static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state) return stride > max_stride; } +static void +intel_fb_plane_get_subsampling(int *hsub, int *vsub, + const struct drm_framebuffer *fb, + int color_plane) +{ + int main_plane; + + if (color_plane == 0) { + *hsub = 1; + *vsub = 1; + + return; + } + + /* + * TODO: Deduct the subsampling from the char block for all CCS + * formats and planes. + */ + if (!is_gen12_ccs_plane(fb, color_plane)) { + *hsub = fb->format->hsub; + *vsub = fb->format->vsub; + + return; + } + + main_plane = ccs_to_main_plane(fb, color_plane); + *hsub = drm_format_info_block_width(fb->format, color_plane) / + drm_format_info_block_width(fb->format, main_plane); + + /* + * The min stride check in the core framebuffer_check() function + * assumes that format->hsub applies to every plane except for the + * first plane. That's incorrect for the CCS AUX plane of the first + * plane, but for the above check to pass we must define the block + * width with that subsampling applied to it. Adjust the width here + * accordingly, so we can calculate the actual subsampling factor. + */ + if (main_plane == 0) + *hsub *= fb->format->hsub; + + *vsub = 32; +} +static int +intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y) +{ + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + int main_plane; + int hsub, vsub; + int tile_width, tile_height; + int ccs_x, ccs_y; + int main_x, main_y; + + if (!is_ccs_plane(fb, ccs_plane)) + return 0; + + intel_tile_dims(fb, ccs_plane, &tile_width, &tile_height); + intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane); + + tile_width *= hsub; + tile_height *= vsub; + + ccs_x = (x * hsub) % tile_width; + ccs_y = (y * vsub) % tile_height; + + main_plane = ccs_to_main_plane(fb, ccs_plane); + main_x = intel_fb->normal[main_plane].x % tile_width; + main_y = intel_fb->normal[main_plane].y % tile_height; + + /* + * CCS doesn't have its own x/y offset register, so the intra CCS tile + * x/y offsets must match between CCS and the main surface. + */ + if (main_x != ccs_x || main_y != ccs_y) { + DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n", + main_x, main_y, + ccs_x, ccs_y, + intel_fb->normal[main_plane].x, + intel_fb->normal[main_plane].y, + x, y); + return -EINVAL; + } + + return 0; +} + +static void +intel_fb_plane_dims(int *w, int *h, struct drm_framebuffer *fb, int color_plane) +{ + int main_plane = is_ccs_plane(fb, color_plane) ? + ccs_to_main_plane(fb, color_plane) : 0; + int main_hsub, main_vsub; + int hsub, vsub; + + intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, main_plane); + intel_fb_plane_get_subsampling(&hsub, &vsub, fb, color_plane); + *w = fb->width / main_hsub / hsub; + *h = fb->height / main_vsub / vsub; +} + +/* + * Setup the rotated view for an FB plane and return the size the GTT mapping + * requires for this view. + */ +static u32 +setup_fb_rotation(int plane, const struct intel_remapped_plane_info *plane_info, + u32 gtt_offset_rotated, int x, int y, + unsigned int width, unsigned int height, + unsigned int tile_size, + unsigned int tile_width, unsigned int tile_height, + struct drm_framebuffer *fb) +{ + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct intel_rotation_info *rot_info = &intel_fb->rot_info; + unsigned int pitch_tiles; + struct drm_rect r; + + /* Y or Yf modifiers required for 90/270 rotation */ + if (fb->modifier != I915_FORMAT_MOD_Y_TILED && + fb->modifier != I915_FORMAT_MOD_Yf_TILED) + return 0; + + if (WARN_ON(plane >= ARRAY_SIZE(rot_info->plane))) + return 0; + + rot_info->plane[plane] = *plane_info; + + intel_fb->rotated[plane].pitch = plane_info->height * tile_height; + + /* rotate the x/y offsets to match the GTT view */ + drm_rect_init(&r, x, y, width, height); + drm_rect_rotate(&r, + plane_info->width * tile_width, + plane_info->height * tile_height, + DRM_MODE_ROTATE_270); + x = r.x1; + y = r.y1; + + /* rotate the tile dimensions to match the GTT view */ + pitch_tiles = intel_fb->rotated[plane].pitch / tile_height; + swap(tile_width, tile_height); + + /* + * We only keep the x/y offsets, so push all of the + * gtt offset into the x/y offsets. + */ + intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, + gtt_offset_rotated * tile_size, 0); + + /* + * First pixel of the framebuffer from + * the start of the rotated gtt mapping. + */ + intel_fb->rotated[plane].x = x; + intel_fb->rotated[plane].y = y; + + return plane_info->width * plane_info->height; +} + static int intel_fill_fb_info(struct drm_i915_private *dev_priv, struct drm_framebuffer *fb) { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct intel_rotation_info *rot_info = &intel_fb->rot_info; struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 gtt_offset_rotated = 0; unsigned int max_size = 0; @@ -2656,8 +3030,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, int ret; cpp = fb->format->cpp[i]; - width = drm_framebuffer_plane_width(fb->width, fb, i); - height = drm_framebuffer_plane_height(fb->height, fb, i); + intel_fb_plane_dims(&width, &height, fb, i); ret = intel_fb_offset_to_xy(&x, &y, fb, i); if (ret) { @@ -2666,36 +3039,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, return ret; } - if (is_ccs_modifier(fb->modifier) && i == 1) { - int hsub = fb->format->hsub; - int vsub = fb->format->vsub; - int tile_width, tile_height; - int main_x, main_y; - int ccs_x, ccs_y; - - intel_tile_dims(fb, i, &tile_width, &tile_height); - tile_width *= hsub; - tile_height *= vsub; - - ccs_x = (x * hsub) % tile_width; - ccs_y = (y * vsub) % tile_height; - main_x = intel_fb->normal[0].x % tile_width; - main_y = intel_fb->normal[0].y % tile_height; - - /* - * CCS doesn't have its own x/y offset register, so the intra CCS tile - * x/y offsets must match between CCS and the main surface. - */ - if (main_x != ccs_x || main_y != ccs_y) { - DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n", - main_x, main_y, - ccs_x, ccs_y, - intel_fb->normal[0].x, - intel_fb->normal[0].y, - x, y); - return -EINVAL; - } - } + ret = intel_fb_check_ccs_xy(fb, i, x, y); + if (ret) + return ret; /* * The fence (if used) is aligned to the start of the object @@ -2726,23 +3072,21 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, tile_size); offset /= tile_size; - if (!is_surface_linear(fb->modifier, i)) { + if (!is_surface_linear(fb, i)) { + struct intel_remapped_plane_info plane_info; unsigned int tile_width, tile_height; - unsigned int pitch_tiles; - struct drm_rect r; intel_tile_dims(fb, i, &tile_width, &tile_height); - rot_info->plane[i].offset = offset; - rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp); - rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width); - rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height); - - intel_fb->rotated[i].pitch = - rot_info->plane[i].height * tile_height; + plane_info.offset = offset; + plane_info.stride = DIV_ROUND_UP(fb->pitches[i], + tile_width * cpp); + plane_info.width = DIV_ROUND_UP(x + width, tile_width); + plane_info.height = DIV_ROUND_UP(y + height, + tile_height); /* how many tiles does this plane need */ - size = rot_info->plane[i].stride * rot_info->plane[i].height; + size = plane_info.stride * plane_info.height; /* * If the plane isn't horizontally tile aligned, * we need one more tile. @@ -2750,39 +3094,13 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, if (x != 0) size++; - /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; - drm_rect_rotate(&r, - rot_info->plane[i].width * tile_width, - rot_info->plane[i].height * tile_height, - DRM_MODE_ROTATE_270); - x = r.x1; - y = r.y1; - - /* rotate the tile dimensions to match the GTT view */ - pitch_tiles = intel_fb->rotated[i].pitch / tile_height; - swap(tile_width, tile_height); - - /* - * We only keep the x/y offsets, so push all of the - * gtt offset into the x/y offsets. - */ - intel_adjust_tile_offset(&x, &y, - tile_width, tile_height, - tile_size, pitch_tiles, - gtt_offset_rotated * tile_size, 0); - - gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; - - /* - * First pixel of the framebuffer from - * the start of the rotated gtt mapping. - */ - intel_fb->rotated[i].x = x; - intel_fb->rotated[i].y = y; + gtt_offset_rotated += + setup_fb_rotation(i, &plane_info, + gtt_offset_rotated, + x, y, width, height, + tile_size, + tile_width, tile_height, + fb); } else { size = DIV_ROUND_UP((y + height) * fb->pitches[i] + x * cpp, tile_size); @@ -2805,11 +3123,11 @@ static void intel_plane_remap_gtt(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - struct drm_framebuffer *fb = plane_state->base.fb; + to_i915(plane_state->uapi.plane->dev); + struct drm_framebuffer *fb = plane_state->hw.fb; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct intel_rotation_info *info = &plane_state->view.rotated; - unsigned int rotation = plane_state->base.rotation; + unsigned int rotation = plane_state->hw.rotation; int i, num_planes = fb->format->num_planes; unsigned int tile_size = intel_tile_size(dev_priv); unsigned int src_x, src_y; @@ -2820,20 +3138,20 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state) plane_state->view.type = drm_rotation_90_or_270(rotation) ? I915_GGTT_VIEW_ROTATED : I915_GGTT_VIEW_REMAPPED; - src_x = plane_state->base.src.x1 >> 16; - src_y = plane_state->base.src.y1 >> 16; - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; + src_x = plane_state->uapi.src.x1 >> 16; + src_y = plane_state->uapi.src.y1 >> 16; + src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + src_h = drm_rect_height(&plane_state->uapi.src) >> 16; WARN_ON(is_ccs_modifier(fb->modifier)); /* Make src coordinates relative to the viewport */ - drm_rect_translate(&plane_state->base.src, + drm_rect_translate(&plane_state->uapi.src, -(src_x << 16), -(src_y << 16)); /* Rotate src coordinates to match rotated GTT view */ if (drm_rotation_90_or_270(rotation)) - drm_rect_rotate(&plane_state->base.src, + drm_rect_rotate(&plane_state->uapi.src, src_w << 16, src_h << 16, DRM_MODE_ROTATE_270); @@ -2866,6 +3184,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state) DRM_MODE_ROTATE_0, tile_size); offset /= tile_size; + WARN_ON(i >= ARRAY_SIZE(info->plane)); info->plane[i].offset = offset; info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp); @@ -2876,10 +3195,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state) struct drm_rect r; /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; + drm_rect_init(&r, x, y, width, height); drm_rect_rotate(&r, info->plane[i].width * tile_width, info->plane[i].height * tile_height, @@ -2918,8 +3234,8 @@ static int intel_plane_compute_gtt(struct intel_plane_state *plane_state) { const struct intel_framebuffer *fb = - to_intel_framebuffer(plane_state->base.fb); - unsigned int rotation = plane_state->base.rotation; + to_intel_framebuffer(plane_state->hw.fb); + unsigned int rotation = plane_state->hw.rotation; int i, num_planes; if (!fb) @@ -2956,7 +3272,7 @@ intel_plane_compute_gtt(struct intel_plane_state *plane_state) /* Rotate src coordinates to match rotated GTT view */ if (drm_rotation_90_or_270(rotation)) - drm_rect_rotate(&plane_state->base.src, + drm_rect_rotate(&plane_state->uapi.src, fb->base.width << 16, fb->base.height << 16, DRM_MODE_ROTATE_270); @@ -2968,6 +3284,8 @@ static int i9xx_format_to_fourcc(int format) switch (format) { case DISPPLANE_8BPP: return DRM_FORMAT_C8; + case DISPPLANE_BGRA555: + return DRM_FORMAT_ARGB1555; case DISPPLANE_BGRX555: return DRM_FORMAT_XRGB1555; case DISPPLANE_BGRX565: @@ -2977,10 +3295,20 @@ static int i9xx_format_to_fourcc(int format) return DRM_FORMAT_XRGB8888; case DISPPLANE_RGBX888: return DRM_FORMAT_XBGR8888; + case DISPPLANE_BGRA888: + return DRM_FORMAT_ARGB8888; + case DISPPLANE_RGBA888: + return DRM_FORMAT_ABGR8888; case DISPPLANE_BGRX101010: return DRM_FORMAT_XRGB2101010; case DISPPLANE_RGBX101010: return DRM_FORMAT_XBGR2101010; + case DISPPLANE_BGRA101010: + return DRM_FORMAT_ARGB2101010; + case DISPPLANE_RGBA101010: + return DRM_FORMAT_ABGR2101010; + case DISPPLANE_RGBX161616: + return DRM_FORMAT_XBGR16161616F; } } @@ -3023,10 +3351,17 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) return DRM_FORMAT_XRGB8888; } case PLANE_CTL_FORMAT_XRGB_2101010: - if (rgb_order) - return DRM_FORMAT_XBGR2101010; - else - return DRM_FORMAT_XRGB2101010; + if (rgb_order) { + if (alpha) + return DRM_FORMAT_ABGR2101010; + else + return DRM_FORMAT_XBGR2101010; + } else { + if (alpha) + return DRM_FORMAT_ARGB2101010; + else + return DRM_FORMAT_XRGB2101010; + } case PLANE_CTL_FORMAT_XRGB_16161616F: if (rgb_order) { if (alpha) @@ -3048,12 +3383,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj = NULL; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; struct drm_framebuffer *fb = &plane_config->fb->base; u32 base_aligned = round_down(plane_config->base, PAGE_SIZE); u32 size_aligned = round_up(plane_config->base + plane_config->size, PAGE_SIZE); + struct drm_i915_gem_object *obj; + bool ret = false; size_aligned -= base_aligned; @@ -3077,13 +3413,11 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - mutex_unlock(&dev->struct_mutex); - if (!obj) + if (IS_ERR(obj)) return false; switch (plane_config->tiling) { @@ -3095,7 +3429,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, break; default: MISSING_CASE(plane_config->tiling); - return false; + goto out; } mode_cmd.pixel_format = fb->format->format; @@ -3107,16 +3441,15 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); - goto out_unref_obj; + goto out; } DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); - return true; - -out_unref_obj: + ret = true; +out: i915_gem_object_put(obj); - return false; + return ret; } static void @@ -3124,19 +3457,19 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state, bool visible) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - plane_state->base.visible = visible; + plane_state->uapi.visible = visible; if (visible) - crtc_state->base.plane_mask |= drm_plane_mask(&plane->base); + crtc_state->uapi.plane_mask |= drm_plane_mask(&plane->base); else - crtc_state->base.plane_mask &= ~drm_plane_mask(&plane->base); + crtc_state->uapi.plane_mask &= ~drm_plane_mask(&plane->base); } static void fixup_active_planes(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct drm_plane *plane; /* @@ -3147,13 +3480,14 @@ static void fixup_active_planes(struct intel_crtc_state *crtc_state) crtc_state->active_planes = 0; drm_for_each_plane_mask(plane, &dev_priv->drm, - crtc_state->base.plane_mask) + crtc_state->uapi.plane_mask) crtc_state->active_planes |= BIT(to_intel_plane(plane)->id); } static void intel_plane_disable_noatomic(struct intel_crtc *crtc, struct intel_plane *plane) { + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane_state *plane_state = @@ -3166,13 +3500,40 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_set_plane_visible(crtc_state, plane_state, false); fixup_active_planes(crtc_state); crtc_state->data_rate[plane->id] = 0; + crtc_state->min_cdclk[plane->id] = 0; if (plane->id == PLANE_PRIMARY) - intel_pre_disable_primary_noatomic(&crtc->base); + hsw_disable_ips(crtc_state); + + /* + * Vblank time updates from the shadow to live plane control register + * are blocked if the memory self-refresh mode is active at that + * moment. So to make sure the plane gets truly disabled, disable + * first the self-refresh mode. The self-refresh enable bit in turn + * will be checked/applied by the HW only at the next frame start + * event which is after the vblank start event, so we need to have a + * wait-for-vblank between disabling the plane and the pipe. + */ + if (HAS_GMCH(dev_priv) && + intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, crtc->pipe); + + /* + * Gen2 reports pipe underruns whenever all planes are disabled. + * So disable underrun reporting before all the planes get disabled. + */ + if (IS_GEN(dev_priv, 2) && !crtc_state->active_planes) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); intel_disable_plane(plane, crtc_state); } +static struct intel_frontbuffer * +to_intel_frontbuffer(struct drm_framebuffer *fb) +{ + return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL; +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -3180,7 +3541,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *c; - struct drm_i915_gem_object *obj; struct drm_plane *primary = intel_crtc->base.primary; struct drm_plane_state *plane_state = primary->state; struct intel_plane *intel_plane = to_intel_plane(primary); @@ -3216,7 +3576,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; if (intel_plane_ggtt_offset(state) == plane_config->base) { - fb = state->base.fb; + fb = state->hw.fb; drm_framebuffer_get(fb); goto valid_fb; } @@ -3234,19 +3594,17 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: - intel_state->base.rotation = plane_config->rotation; + intel_state->hw.rotation = plane_config->rotation; intel_fill_fb_ggtt_view(&intel_state->view, fb, - intel_state->base.rotation); + intel_state->hw.rotation); intel_state->color_plane[0].stride = - intel_fb_pitch(fb, 0, intel_state->base.rotation); + intel_fb_pitch(fb, 0, intel_state->hw.rotation); - mutex_lock(&dev->struct_mutex); intel_state->vma = intel_pin_and_fence_fb_obj(fb, &intel_state->view, intel_plane_uses_fence(intel_state), &intel_state->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", intel_crtc->pipe, PTR_ERR(intel_state->vma)); @@ -3256,8 +3614,7 @@ valid_fb: return; } - obj = intel_fb_obj(fb); - intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); plane_state->src_x = 0; plane_state->src_y = 0; @@ -3269,17 +3626,18 @@ valid_fb: plane_state->crtc_w = fb->width; plane_state->crtc_h = fb->height; - intel_state->base.src = drm_plane_state_src(plane_state); - intel_state->base.dst = drm_plane_state_dest(plane_state); + intel_state->uapi.src = drm_plane_state_src(plane_state); + intel_state->uapi.dst = drm_plane_state_dest(plane_state); - if (i915_gem_object_is_tiled(obj)) + if (plane_config->tiling) dev_priv->preserve_bios_swizzle = true; plane_state->fb = fb; plane_state->crtc = &intel_crtc->base; + intel_plane_copy_uapi_to_hw_state(intel_state, intel_state); atomic_or(to_intel_plane(primary)->frontbuffer_bit, - &obj->frontbuffer_bits); + &to_intel_frontbuffer(fb)->bits); } static int skl_max_plane_width(const struct drm_framebuffer *fb, @@ -3291,9 +3649,23 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - return 4096; + /* + * Validated limit is 4k, but has 5k should + * work apart from the following features: + * - Ytile (already limited to 4k) + * - FP16 (already limited to 4k) + * - render compression (already limited to 4k) + * - KVMR sprite and cursor (don't care) + * - horizontal panning (TODO verify this) + * - pipe and plane scaling (TODO verify this) + */ + if (cpp == 8) + return 4096; + else + return 5120; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: @@ -3342,17 +3714,30 @@ static int icl_max_plane_width(const struct drm_framebuffer *fb, return 5120; } -static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, - int main_x, int main_y, u32 main_offset) +static int skl_max_plane_height(void) +{ + return 4096; +} + +static int icl_max_plane_height(void) { - const struct drm_framebuffer *fb = plane_state->base.fb; - int hsub = fb->format->hsub; - int vsub = fb->format->vsub; - int aux_x = plane_state->color_plane[1].x; - int aux_y = plane_state->color_plane[1].y; - u32 aux_offset = plane_state->color_plane[1].offset; - u32 alignment = intel_surf_alignment(fb, 1); + return 4320; +} +static bool +skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, + int main_x, int main_y, u32 main_offset, + int ccs_plane) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + int aux_x = plane_state->color_plane[ccs_plane].x; + int aux_y = plane_state->color_plane[ccs_plane].y; + u32 aux_offset = plane_state->color_plane[ccs_plane].offset; + u32 alignment = intel_surf_alignment(fb, ccs_plane); + int hsub; + int vsub; + + intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane); while (aux_offset >= main_offset && aux_y <= main_y) { int x, y; @@ -3364,8 +3749,12 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state x = aux_x / hsub; y = aux_y / vsub; - aux_offset = intel_plane_adjust_aligned_offset(&x, &y, plane_state, 1, - aux_offset, aux_offset - alignment); + aux_offset = intel_plane_adjust_aligned_offset(&x, &y, + plane_state, + ccs_plane, + aux_offset, + aux_offset - + alignment); aux_x = x * hsub + aux_x % hsub; aux_y = y * vsub + aux_y % vsub; } @@ -3373,25 +3762,28 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state if (aux_x != main_x || aux_y != main_y) return false; - plane_state->color_plane[1].offset = aux_offset; - plane_state->color_plane[1].x = aux_x; - plane_state->color_plane[1].y = aux_y; + plane_state->color_plane[ccs_plane].offset = aux_offset; + plane_state->color_plane[ccs_plane].x = aux_x; + plane_state->color_plane[ccs_plane].y = aux_y; return true; } static int skl_check_main_surface(struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; - int w = drm_rect_width(&plane_state->base.src) >> 16; - int h = drm_rect_height(&plane_state->base.src) >> 16; + struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; + int x = plane_state->uapi.src.x1 >> 16; + int y = plane_state->uapi.src.y1 >> 16; + int w = drm_rect_width(&plane_state->uapi.src) >> 16; + int h = drm_rect_height(&plane_state->uapi.src) >> 16; int max_width; - int max_height = 4096; - u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; + int max_height; + u32 alignment; + u32 offset; + int aux_plane = intel_main_to_aux_plane(fb, 0); + u32 aux_offset = plane_state->color_plane[aux_plane].offset; if (INTEL_GEN(dev_priv) >= 11) max_width = icl_max_plane_width(fb, 0, rotation); @@ -3400,6 +3792,11 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) else max_width = skl_max_plane_width(fb, 0, rotation); + if (INTEL_GEN(dev_priv) >= 11) + max_height = icl_max_plane_height(); + else + max_height = skl_max_plane_height(); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); @@ -3409,6 +3806,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) intel_add_fb_offsets(&x, &y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 0); alignment = intel_surf_alignment(fb, 0); + if (WARN_ON(alignment && !is_power_of_2(alignment))) + return -EINVAL; /* * AUX surface offset is specified as the distance from the @@ -3444,7 +3843,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) * they match with the main surface x/y offsets. */ if (is_ccs_modifier(fb->modifier)) { - while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) { + while (!skl_check_main_ccs_coordinates(plane_state, x, y, + offset, aux_plane)) { if (offset == 0) break; @@ -3452,7 +3852,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) offset, offset - alignment); } - if (x != plane_state->color_plane[1].x || y != plane_state->color_plane[1].y) { + if (x != plane_state->color_plane[aux_plane].x || + y != plane_state->color_plane[aux_plane].y) { DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n"); return -EINVAL; } @@ -3466,27 +3867,28 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (x << 16) - plane_state->base.src.x1, - (y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->uapi.src, + x << 16, y << 16); return 0; } static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - int max_width = skl_max_plane_width(fb, 1, rotation); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; + int uv_plane = 1; + int max_width = skl_max_plane_width(fb, uv_plane, rotation); int max_height = 4096; - int x = plane_state->base.src.x1 >> 17; - int y = plane_state->base.src.y1 >> 17; - int w = drm_rect_width(&plane_state->base.src) >> 17; - int h = drm_rect_height(&plane_state->base.src) >> 17; + int x = plane_state->uapi.src.x1 >> 17; + int y = plane_state->uapi.src.y1 >> 17; + int w = drm_rect_width(&plane_state->uapi.src) >> 17; + int h = drm_rect_height(&plane_state->uapi.src) >> 17; u32 offset; - intel_add_fb_offsets(&x, &y, plane_state, 1); - offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1); + intel_add_fb_offsets(&x, &y, plane_state, uv_plane); + offset = intel_plane_compute_aligned_offset(&x, &y, + plane_state, uv_plane); /* FIXME not quite sure how/if these apply to the chroma plane */ if (w > max_width || h > max_height) { @@ -3495,62 +3897,126 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) return -EINVAL; } - plane_state->color_plane[1].offset = offset; - plane_state->color_plane[1].x = x; - plane_state->color_plane[1].y = y; + if (is_ccs_modifier(fb->modifier)) { + int ccs_plane = main_to_ccs_plane(fb, uv_plane); + int aux_offset = plane_state->color_plane[ccs_plane].offset; + int alignment = intel_surf_alignment(fb, uv_plane); + + if (offset > aux_offset) + offset = intel_plane_adjust_aligned_offset(&x, &y, + plane_state, + uv_plane, + offset, + aux_offset & ~(alignment - 1)); + + while (!skl_check_main_ccs_coordinates(plane_state, x, y, + offset, ccs_plane)) { + if (offset == 0) + break; + + offset = intel_plane_adjust_aligned_offset(&x, &y, + plane_state, + uv_plane, + offset, offset - alignment); + } + + if (x != plane_state->color_plane[ccs_plane].x || + y != plane_state->color_plane[ccs_plane].y) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n"); + return -EINVAL; + } + } + + plane_state->color_plane[uv_plane].offset = offset; + plane_state->color_plane[uv_plane].x = x; + plane_state->color_plane[uv_plane].y = y; return 0; } static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - int src_x = plane_state->base.src.x1 >> 16; - int src_y = plane_state->base.src.y1 >> 16; - int hsub = fb->format->hsub; - int vsub = fb->format->vsub; - int x = src_x / hsub; - int y = src_y / vsub; + const struct drm_framebuffer *fb = plane_state->hw.fb; + int src_x = plane_state->uapi.src.x1 >> 16; + int src_y = plane_state->uapi.src.y1 >> 16; u32 offset; + int ccs_plane; + + for (ccs_plane = 0; ccs_plane < fb->format->num_planes; ccs_plane++) { + int main_hsub, main_vsub; + int hsub, vsub; + int x, y; + + if (!is_ccs_plane(fb, ccs_plane)) + continue; + + intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, + ccs_to_main_plane(fb, ccs_plane)); + intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane); - intel_add_fb_offsets(&x, &y, plane_state, 1); - offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1); + hsub *= main_hsub; + vsub *= main_vsub; + x = src_x / hsub; + y = src_y / vsub; + + intel_add_fb_offsets(&x, &y, plane_state, ccs_plane); + + offset = intel_plane_compute_aligned_offset(&x, &y, + plane_state, + ccs_plane); - plane_state->color_plane[1].offset = offset; - plane_state->color_plane[1].x = x * hsub + src_x % hsub; - plane_state->color_plane[1].y = y * vsub + src_y % vsub; + plane_state->color_plane[ccs_plane].offset = offset; + plane_state->color_plane[ccs_plane].x = (x * hsub + + src_x % hsub) / + main_hsub; + plane_state->color_plane[ccs_plane].y = (y * vsub + + src_y % vsub) / + main_vsub; + } return 0; } int skl_check_plane_surface(struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int ret; + bool needs_aux = false; ret = intel_plane_compute_gtt(plane_state); if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; /* - * Handle the AUX surface first since - * the main surface setup depends on it. + * Handle the AUX surface first since the main surface setup depends on + * it. */ - if (is_planar_yuv_format(fb->format->format)) { - ret = skl_check_nv12_aux_surface(plane_state); + if (is_ccs_modifier(fb->modifier)) { + needs_aux = true; + ret = skl_check_ccs_aux_surface(plane_state); if (ret) return ret; - } else if (is_ccs_modifier(fb->modifier)) { - ret = skl_check_ccs_aux_surface(plane_state); + } + + if (intel_format_info_is_yuv_semiplanar(fb->format, + fb->modifier)) { + needs_aux = true; + ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; - } else { - plane_state->color_plane[1].offset = ~0xfff; - plane_state->color_plane[1].x = 0; - plane_state->color_plane[1].y = 0; + } + + if (!needs_aux) { + int i; + + for (i = 1; i < fb->format->num_planes; i++) { + plane_state->color_plane[i].offset = ~0xfff; + plane_state->color_plane[i].x = 0; + plane_state->color_plane[i].y = 0; + } } ret = skl_check_main_surface(plane_state); @@ -3560,6 +4026,53 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; } +static void i9xx_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int cpp = fb->format->cpp[0]; + + /* + * g4x bspec says 64bpp pixel rate can't exceed 80% + * of cdclk when the sprite plane is enabled on the + * same pipe. ilk/snb bspec says 64bpp pixel rate is + * never allowed to exceed 80% of cdclk. Let's just go + * with the ilk/snb limit always. + */ + if (cpp == 8) { + *num = 10; + *den = 8; + } else { + *num = 1; + *den = 1; + } +} + +static int i9xx_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + i9xx_plane_ratio(crtc_state, plane_state, &num, &den); + + /* two pixels per clock with double wide pipe */ + if (crtc_state->double_wide) + den *= 2; + + return DIV_ROUND_UP(pixel_rate * num, den); +} + unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -3589,7 +4102,7 @@ i9xx_plane_max_stride(struct intel_plane *plane, static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 dspcntr = 0; @@ -3609,9 +4122,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; u32 dspcntr; dspcntr = DISPLAY_PLANE_ENABLE; @@ -3627,6 +4140,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB1555: dspcntr |= DISPPLANE_BGRX555; break; + case DRM_FORMAT_ARGB1555: + dspcntr |= DISPPLANE_BGRA555; + break; case DRM_FORMAT_RGB565: dspcntr |= DISPPLANE_BGRX565; break; @@ -3636,12 +4152,27 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XBGR8888: dspcntr |= DISPPLANE_RGBX888; break; + case DRM_FORMAT_ARGB8888: + dspcntr |= DISPPLANE_BGRA888; + break; + case DRM_FORMAT_ABGR8888: + dspcntr |= DISPPLANE_RGBA888; + break; case DRM_FORMAT_XRGB2101010: dspcntr |= DISPPLANE_BGRX101010; break; case DRM_FORMAT_XBGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; + case DRM_FORMAT_ARGB2101010: + dspcntr |= DISPPLANE_BGRA101010; + break; + case DRM_FORMAT_ABGR2101010: + dspcntr |= DISPPLANE_RGBA101010; + break; + case DRM_FORMAT_XBGR16161616F: + dspcntr |= DISPPLANE_RGBX161616; + break; default: MISSING_CASE(fb->format->format); return 0; @@ -3663,8 +4194,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, int i9xx_check_plane_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - int src_x, src_y; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + int src_x, src_y, src_w; u32 offset; int ret; @@ -3672,11 +4204,16 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; - src_x = plane_state->base.src.x1 >> 16; - src_y = plane_state->base.src.y1 >> 16; + src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + src_x = plane_state->uapi.src.x1 >> 16; + src_y = plane_state->uapi.src.y1 >> 16; + + /* Undocumented hardware limit on i965/g4x/vlv/chv */ + if (HAS_GMCH(dev_priv) && fb->format->cpp[0] == 8 && src_w > 2048) + return -EINVAL; intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); @@ -3690,15 +4227,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (src_x << 16) - plane_state->base.src.x1, - (src_y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->uapi.src, + src_x << 16, src_y << 16); /* HSW/BDW do this automagically in hardware */ if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { - unsigned int rotation = plane_state->base.rotation; - int src_w = drm_rect_width(&plane_state->base.src) >> 16; - int src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned int rotation = plane_state->hw.rotation; + int src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + int src_h = drm_rect_height(&plane_state->uapi.src) >> 16; if (rotation & DRM_MODE_ROTATE_180) { src_x += src_w - 1; @@ -3715,21 +4251,39 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) return 0; } +static bool i9xx_plane_has_windowing(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + + if (IS_CHERRYVIEW(dev_priv)) + return i9xx_plane == PLANE_B; + else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + return false; + else if (IS_GEN(dev_priv, 4)) + return i9xx_plane == PLANE_C; + else + return i9xx_plane == PLANE_B || + i9xx_plane == PLANE_C; +} + static int i9xx_plane_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); int ret; ret = chv_plane_check_rotation(plane_state); if (ret) return ret; - ret = drm_atomic_helper_check_plane_state(&plane_state->base, - &crtc_state->base, + ret = drm_atomic_helper_check_plane_state(&plane_state->uapi, + &crtc_state->uapi, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, - false, true); + i9xx_plane_has_windowing(plane), + true); if (ret) return ret; @@ -3737,7 +4291,7 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; ret = intel_plane_check_src_coordinates(plane_state); @@ -3758,6 +4312,10 @@ static void i9xx_update_plane(struct intel_plane *plane, u32 linear_offset; int x = plane_state->color_plane[0].x; int y = plane_state->color_plane[0].y; + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; + int crtc_w = drm_rect_width(&plane_state->uapi.dst); + int crtc_h = drm_rect_height(&plane_state->uapi.dst); unsigned long irqflags; u32 dspaddr_offset; u32 dspcntr; @@ -3776,18 +4334,18 @@ static void i9xx_update_plane(struct intel_plane *plane, I915_WRITE_FW(DSPSTRIDE(i9xx_plane), plane_state->color_plane[0].stride); if (INTEL_GEN(dev_priv) < 4) { - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. + /* + * PLANE_A doesn't actually have a full window + * generator but let's assume we still need to + * program whatever is there. */ - I915_WRITE_FW(DSPPOS(i9xx_plane), 0); + I915_WRITE_FW(DSPPOS(i9xx_plane), (crtc_y << 16) | crtc_x); I915_WRITE_FW(DSPSIZE(i9xx_plane), - ((crtc_state->pipe_src_h - 1) << 16) | - (crtc_state->pipe_src_w - 1)); + ((crtc_h - 1) << 16) | (crtc_w - 1)); } else if (IS_CHERRYVIEW(dev_priv) && i9xx_plane == PLANE_B) { - I915_WRITE_FW(PRIMPOS(i9xx_plane), 0); + I915_WRITE_FW(PRIMPOS(i9xx_plane), (crtc_y << 16) | crtc_x); I915_WRITE_FW(PRIMSIZE(i9xx_plane), - ((crtc_state->pipe_src_h - 1) << 16) | - (crtc_state->pipe_src_w - 1)); + ((crtc_h - 1) << 16) | (crtc_w - 1)); I915_WRITE_FW(PRIMCNSTALPHA(i9xx_plane), 0); } @@ -3897,7 +4455,7 @@ static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) */ static void skl_detach_scalers(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int i; @@ -3916,7 +4474,7 @@ static unsigned int skl_plane_stride_mult(const struct drm_framebuffer *fb, * The stride is either expressed as a multiple of 64 bytes chunks for * linear buffers or in number of tiles for tiled buffers. */ - if (fb->modifier == DRM_FORMAT_MOD_LINEAR) + if (is_surface_linear(fb, color_plane)) return 64; else if (drm_rotation_90_or_270(rotation)) return intel_tile_height(fb, color_plane); @@ -3927,8 +4485,8 @@ static unsigned int skl_plane_stride_mult(const struct drm_framebuffer *fb, u32 skl_plane_stride(const struct intel_plane_state *plane_state, int color_plane) { - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; u32 stride = plane_state->color_plane[color_plane].stride; if (color_plane >= fb->format->num_planes) @@ -3950,10 +4508,12 @@ static u32 skl_plane_ctl_format(u32 pixel_format) case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: return PLANE_CTL_FORMAT_XRGB_8888; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: + return PLANE_CTL_FORMAT_XRGB_2101010 | PLANE_CTL_ORDER_RGBX; case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: return PLANE_CTL_FORMAT_XRGB_2101010; - case DRM_FORMAT_XBGR2101010: - return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010; case DRM_FORMAT_XBGR16161616F: case DRM_FORMAT_ABGR16161616F: return PLANE_CTL_FORMAT_XRGB_16161616F | PLANE_CTL_ORDER_RGBX; @@ -3997,10 +4557,10 @@ static u32 skl_plane_ctl_format(u32 pixel_format) static u32 skl_plane_ctl_alpha(const struct intel_plane_state *plane_state) { - if (!plane_state->base.fb->format->has_alpha) + if (!plane_state->hw.fb->format->has_alpha) return PLANE_CTL_ALPHA_DISABLE; - switch (plane_state->base.pixel_blend_mode) { + switch (plane_state->hw.pixel_blend_mode) { case DRM_MODE_BLEND_PIXEL_NONE: return PLANE_CTL_ALPHA_DISABLE; case DRM_MODE_BLEND_PREMULTI: @@ -4008,17 +4568,17 @@ static u32 skl_plane_ctl_alpha(const struct intel_plane_state *plane_state) case DRM_MODE_BLEND_COVERAGE: return PLANE_CTL_ALPHA_HW_PREMULTIPLY; default: - MISSING_CASE(plane_state->base.pixel_blend_mode); + MISSING_CASE(plane_state->hw.pixel_blend_mode); return PLANE_CTL_ALPHA_DISABLE; } } static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state) { - if (!plane_state->base.fb->format->has_alpha) + if (!plane_state->hw.fb->format->has_alpha) return PLANE_COLOR_ALPHA_DISABLE; - switch (plane_state->base.pixel_blend_mode) { + switch (plane_state->hw.pixel_blend_mode) { case DRM_MODE_BLEND_PIXEL_NONE: return PLANE_COLOR_ALPHA_DISABLE; case DRM_MODE_BLEND_PREMULTI: @@ -4026,7 +4586,7 @@ static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state case DRM_MODE_BLEND_COVERAGE: return PLANE_COLOR_ALPHA_HW_PREMULTIPLY; default: - MISSING_CASE(plane_state->base.pixel_blend_mode); + MISSING_CASE(plane_state->hw.pixel_blend_mode); return PLANE_COLOR_ALPHA_DISABLE; } } @@ -4042,6 +4602,12 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier) return PLANE_CTL_TILED_Y; case I915_FORMAT_MOD_Y_TILED_CCS: return PLANE_CTL_TILED_Y | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + return PLANE_CTL_TILED_Y | + PLANE_CTL_RENDER_DECOMPRESSION_ENABLE | + PLANE_CTL_CLEAR_COLOR_DISABLE; + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + return PLANE_CTL_TILED_Y | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Yf_TILED: return PLANE_CTL_TILED_YF; case I915_FORMAT_MOD_Yf_TILED_CCS: @@ -4092,7 +4658,7 @@ static u32 cnl_plane_ctl_flip(unsigned int reflect) u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 plane_ctl = 0; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) @@ -4111,9 +4677,9 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 plane_ctl; @@ -4123,10 +4689,10 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_alpha(plane_state); plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; - if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) plane_ctl |= PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709; - if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_ctl |= PLANE_CTL_YUV_RANGE_CORRECTION_DISABLE; } @@ -4148,7 +4714,7 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 plane_color_ctl = 0; if (INTEL_GEN(dev_priv) >= 11) @@ -4167,21 +4733,21 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); u32 plane_color_ctl = 0; plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(plane_state); if (fb->format->is_yuv && !icl_is_hdr_plane(dev_priv, plane->id)) { - if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; else plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709; - if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } else if (fb->format->is_yuv) { plane_color_ctl |= PLANE_COLOR_INPUT_CSC_ENABLE; @@ -4200,7 +4766,7 @@ __intel_display_resume(struct drm_device *dev, int i, ret; intel_modeset_setup_hw_state(dev, ctx); - i915_redisable_vga(to_i915(dev)); + intel_vga_redisable(to_i915(dev)); if (!state) return 0; @@ -4232,7 +4798,7 @@ __intel_display_resume(struct drm_device *dev, static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) { return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display && - intel_has_gpu_reset(dev_priv)); + intel_has_gpu_reset(&dev_priv->gt)); } void intel_prepare_reset(struct drm_i915_private *dev_priv) @@ -4248,12 +4814,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) return; /* We have a modeset vs reset deadlock, defensively unbreak it. */ - set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags); + smp_mb__after_atomic(); + wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET); if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) { DRM_DEBUG_KMS("Modeset potentially stuck, unbreaking through wedging\n"); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&dev_priv->gt); } /* @@ -4299,7 +4866,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) int ret; /* reset doesn't touch the display */ - if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) + if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) return; state = fetch_and_zero(&dev_priv->modeset_restore_state); @@ -4318,7 +4885,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * so need a full re-initialization. */ intel_pps_unlock_regs_wa(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -4339,7 +4906,7 @@ unlock: drm_modeset_acquire_fini(ctx); mutex_unlock(&dev->mode_config.mutex); - clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); + clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags); } static void icl_set_pipe_chicken(struct intel_crtc *crtc) @@ -4366,50 +4933,41 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) I915_WRITE(PIPE_CHICKEN(pipe), tmp); } -static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = new_crtc_state->base.mode; + u32 trans_ddi_func_ctl2_val; + u8 master_select; /* - * Update pipe size and adjust fitter if needed: the reason for this is - * that in compute_mode_changes we check the native mode (not the pfit - * mode) to see if we can flip rather than do a full mode set. In the - * fastboot case, we'll flip, but if we don't update the pipesrc and - * pfit state, we'll end up with a big fb scanned out into the wrong - * sized surface. + * Configure the master select and enable Transcoder Port Sync for + * Slave CRTCs transcoder. */ + if (crtc_state->master_transcoder == INVALID_TRANSCODER) + return; - I915_WRITE(PIPESRC(crtc->pipe), - ((new_crtc_state->pipe_src_w - 1) << 16) | - (new_crtc_state->pipe_src_h - 1)); + if (crtc_state->master_transcoder == TRANSCODER_EDP) + master_select = 0; + else + master_select = crtc_state->master_transcoder + 1; - /* on skylake this is done by detaching scalers */ - if (INTEL_GEN(dev_priv) >= 9) { - skl_detach_scalers(new_crtc_state); + /* Set the master select bits for Tranascoder Port Sync */ + trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) & + PORT_SYNC_MODE_MASTER_SELECT_MASK) << + PORT_SYNC_MODE_MASTER_SELECT_SHIFT; + /* Enable Transcoder Port Sync */ + trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE; - if (new_crtc_state->pch_pfit.enabled) - skylake_pfit_enable(new_crtc_state); - } else if (HAS_PCH_SPLIT(dev_priv)) { - if (new_crtc_state->pch_pfit.enabled) - ironlake_pfit_enable(new_crtc_state); - else if (old_crtc_state->pch_pfit.enabled) - ironlake_pfit_disable(old_crtc_state); - } - - if (INTEL_GEN(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); + I915_WRITE(TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder), + trans_ddi_func_ctl2_val); } static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -4447,17 +5005,17 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct intel_crtc *crtc, - const struct intel_crtc_state *crtc_state) +static void ilk_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; /* FDI needs bits from pipe first */ - assert_pipe_enabled(dev_priv, pipe); + assert_pipe_enabled(dev_priv, crtc_state->cpu_transcoder); /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit for train result */ @@ -4553,7 +5111,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -4686,7 +5244,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -4800,11 +5358,11 @@ train_done: DRM_DEBUG_KMS("FDI train done.\n"); } -static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) +static void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4837,11 +5395,11 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) } } -static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) +static void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4867,12 +5425,10 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) udelay(100); } -static void ironlake_fdi_disable(struct drm_crtc *crtc) +static void ilk_fdi_disable(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -4963,9 +5519,9 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) /* Program iCLKIP clock to the desired frequency */ static void lpt_program_iclkip(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int clock = crtc_state->base.adjusted_mode.crtc_clock; + int clock = crtc_state->hw.adjusted_mode.crtc_clock; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; @@ -5076,10 +5632,10 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv) desired_divisor << auxdiv); } -static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state, - enum pipe pch_transcoder) +static void ilk_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state, + enum pipe pch_transcoder) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; @@ -5120,9 +5676,9 @@ static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool e POSTING_READ(SOUTH_CHICKEN1); } -static void ivybridge_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state) +static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); switch (crtc->pipe) { @@ -5152,7 +5708,7 @@ static struct intel_encoder * intel_get_crtc_new_encoder(const struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct drm_connector_state *connector_state; const struct drm_connector *connector; struct intel_encoder *encoder = NULL; @@ -5181,19 +5737,19 @@ intel_get_crtc_new_encoder(const struct intel_atomic_state *state, * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(const struct intel_atomic_state *state, - const struct intel_crtc_state *crtc_state) +static void ilk_pch_enable(const struct intel_atomic_state *state, + const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(crtc_state); + ivb_update_fdi_bc_bifurcation(crtc_state); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -5230,7 +5786,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(crtc_state, pipe); + ilk_pch_transcoder_set_timings(crtc_state, pipe); intel_fdi_normal_train(crtc); @@ -5238,7 +5794,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, if (HAS_PCH_CPT(dev_priv) && intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); enum port port; @@ -5262,13 +5818,13 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, I915_WRITE(reg, temp); } - ironlake_enable_pch_transcoder(crtc_state); + ilk_enable_pch_transcoder(crtc_state); } static void lpt_pch_enable(const struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; @@ -5277,14 +5833,14 @@ static void lpt_pch_enable(const struct intel_atomic_state *state, lpt_program_iclkip(crtc_state); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(crtc_state, PIPE_A); + ilk_pch_transcoder_set_timings(crtc_state, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } -static void cpt_verify_modeset(struct drm_device *dev, int pipe) +static void cpt_verify_modeset(struct drm_i915_private *dev_priv, + enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t dslreg = PIPEDSL(pipe); u32 temp; @@ -5380,15 +5936,16 @@ static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, int src_w, int src_h, int dst_w, int dst_h, - const struct drm_format_info *format, bool need_scaler) + const struct drm_format_info *format, + u64 modifier, bool need_scaler) { struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct intel_crtc *intel_crtc = - to_intel_crtc(crtc_state->base.crtc); + to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; /* * Src coordinates are already rotated by 270 degrees for @@ -5404,7 +5961,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, * Once NV12 is enabled, handle it here while allocating scaler * for NV12. */ - if (INTEL_GEN(dev_priv) >= 9 && crtc_state->base.enable && + if (INTEL_GEN(dev_priv) >= 9 && crtc_state->hw.enable && need_scaler && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { DRM_DEBUG_KMS("Pipe/Plane scaling not supported with IF-ID mode\n"); return -EINVAL; @@ -5434,7 +5991,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } - if (format && is_planar_yuv_format(format->format) && + if (format && intel_format_info_is_yuv_semiplanar(format, modifier) && (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { DRM_DEBUG_KMS("Planar YUV: src dimensions not met\n"); return -EINVAL; @@ -5476,17 +6033,18 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, */ int skl_update_scaler_crtc(struct intel_crtc_state *state) { - const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &state->hw.adjusted_mode; bool need_scaler = false; if (state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) need_scaler = true; - return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, + return skl_update_scaler(state, !state->hw.active, SKL_CRTC_INDEX, &state->scaler_state.scaler_id, state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, - adjusted_mode->crtc_vdisplay, NULL, need_scaler); + adjusted_mode->crtc_vdisplay, NULL, 0, + need_scaler); } /** @@ -5502,26 +6060,28 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { struct intel_plane *intel_plane = - to_intel_plane(plane_state->base.plane); + to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); - struct drm_framebuffer *fb = plane_state->base.fb; + struct drm_framebuffer *fb = plane_state->hw.fb; int ret; - bool force_detach = !fb || !plane_state->base.visible; + bool force_detach = !fb || !plane_state->uapi.visible; bool need_scaler = false; /* Pre-gen11 and SDR planes always need a scaler for planar formats. */ if (!icl_is_hdr_plane(dev_priv, intel_plane->id) && - fb && is_planar_yuv_format(fb->format->format)) + fb && intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) need_scaler = true; ret = skl_update_scaler(crtc_state, force_detach, drm_plane_index(&intel_plane->base), &plane_state->scaler_id, - drm_rect_width(&plane_state->base.src) >> 16, - drm_rect_height(&plane_state->base.src) >> 16, - drm_rect_width(&plane_state->base.dst), - drm_rect_height(&plane_state->base.dst), - fb ? fb->format : NULL, need_scaler); + drm_rect_width(&plane_state->uapi.src) >> 16, + drm_rect_height(&plane_state->uapi.src) >> 16, + drm_rect_width(&plane_state->uapi.dst), + drm_rect_height(&plane_state->uapi.dst), + fb ? fb->format : NULL, + fb ? fb->modifier : 0, + need_scaler); if (ret || plane_state->scaler_id < 0) return ret; @@ -5543,10 +6103,8 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -5562,6 +6120,13 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_XVYU12_16161616: case DRM_FORMAT_XVYU16161616: break; + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + if (INTEL_GEN(dev_priv) >= 11) + break; + /* fall through */ default: DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", intel_plane->base.base.id, intel_plane->base.name, @@ -5572,17 +6137,18 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, return 0; } -static void skylake_scaler_disable(struct intel_crtc *crtc) +void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); int i; for (i = 0; i < crtc->num_scalers; i++) skl_detach_scaler(crtc, i); } -static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state) +static void skl_pfit_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; const struct intel_crtc_scaler_state *scaler_state = @@ -5617,11 +6183,11 @@ static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state) } } -static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state) +static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; if (crtc_state->pch_pfit.enabled) { /* Force use of hard-coded filter coefficients @@ -5640,7 +6206,7 @@ static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state) void hsw_enable_ips(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5669,16 +6235,14 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) * and don't wait for vblanks until the end of crtc_enable, then * the HW state readout code will complain that the expected * IPS_CTL value is not the one we read. */ - if (intel_wait_for_register(&dev_priv->uncore, - IPS_CTL, IPS_ENABLE, IPS_ENABLE, - 50)) + if (intel_de_wait_for_set(dev_priv, IPS_CTL, IPS_ENABLE, 50)) DRM_ERROR("Timed out waiting for IPS enable\n"); } } void hsw_disable_ips(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5692,9 +6256,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) * 42ms timeout value leads to occasional timeouts so use 100ms * instead. */ - if (intel_wait_for_register(&dev_priv->uncore, - IPS_CTL, IPS_ENABLE, 0, - 100)) + if (intel_de_wait_for_clear(dev_priv, IPS_CTL, IPS_ENABLE, 100)) DRM_ERROR("Timed out waiting for IPS disable\n"); } else { I915_WRITE(IPS_CTL, 0); @@ -5707,96 +6269,24 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { - if (intel_crtc->overlay) { - struct drm_device *dev = intel_crtc->base.dev; - - mutex_lock(&dev->struct_mutex); + if (intel_crtc->overlay) (void) intel_overlay_switch_off(intel_crtc->overlay); - mutex_unlock(&dev->struct_mutex); - } /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. */ } -/** - * intel_post_enable_primary - Perform operations after enabling primary plane - * @crtc: the CRTC whose primary plane was just enabled - * @new_crtc_state: the enabling state - * - * Performs potentially sleeping operations that must be done after the primary - * plane is enabled, such as updating FBC and IPS. Note that this may be - * called due to an explicit primary plane update, or due to an implicit - * re-enable that is caused when a sprite plane is updated to no longer - * completely hide the primary plane. - */ -static void -intel_post_enable_primary(struct drm_crtc *crtc, - const struct intel_crtc_state *new_crtc_state) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - /* - * Gen2 reports pipe underruns whenever all planes are disabled. - * So don't enable underrun reporting before at least some planes - * are enabled. - * FIXME: Need to fix the logic to work when we turn off all planes - * but leave the pipe running. - */ - if (IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - - /* Underruns don't always raise interrupts, so check manually. */ - intel_check_cpu_fifo_underruns(dev_priv); - intel_check_pch_fifo_underruns(dev_priv); -} - -/* FIXME get rid of this and use pre_plane_update */ -static void -intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - /* - * Gen2 reports pipe underruns whenever all planes are disabled. - * So disable underrun reporting before all the planes get disabled. - */ - if (IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - - hsw_disable_ips(to_intel_crtc_state(crtc->state)); - - /* - * Vblank time updates from the shadow to live plane control register - * are blocked if the memory self-refresh mode is active at that - * moment. So to make sure the plane gets truly disabled, disable - * first the self-refresh mode. The self-refresh enable bit in turn - * will be checked/applied by the HW only at the next frame start - * event which is after the vblank start event, so we need to have a - * wait-for-vblank between disabling the plane and the pipe. - */ - if (HAS_GMCH(dev_priv) && - intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, pipe); -} - static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (!old_crtc_state->ips_enabled) return false; - if (needs_modeset(&new_crtc_state->base)) + if (needs_modeset(new_crtc_state)) return true; /* @@ -5806,7 +6296,7 @@ static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_s * Disable IPS before we program the LUT. */ if (IS_HASWELL(dev_priv) && - (new_crtc_state->base.color_mgmt_changed || + (new_crtc_state->uapi.color_mgmt_changed || new_crtc_state->update_pipe) && new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) return true; @@ -5817,13 +6307,13 @@ static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_s static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (!new_crtc_state->ips_enabled) return false; - if (needs_modeset(&new_crtc_state->base)) + if (needs_modeset(new_crtc_state)) return true; /* @@ -5833,7 +6323,7 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s * Re-enable IPS after the LUT has been programmed. */ if (IS_HASWELL(dev_priv) && - (new_crtc_state->base.color_mgmt_changed || + (new_crtc_state->uapi.color_mgmt_changed || new_crtc_state->update_pipe) && new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) return true; @@ -5843,15 +6333,16 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s * forcibly enable IPS on the first fastset. */ if (new_crtc_state->update_pipe && - old_crtc_state->base.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED) + old_crtc_state->hw.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED) return true; return !old_crtc_state->ips_enabled; } -static bool needs_nv12_wa(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *crtc_state) +static bool needs_nv12_wa(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + if (!crtc_state->nv12_planes) return false; @@ -5862,9 +6353,10 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv, return false; } -static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *crtc_state) +static bool needs_scalerclk_wa(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + /* Wa_2006604312:icl */ if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv)) return true; @@ -5872,89 +6364,82 @@ static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv, return false; } -static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) +static bool planes_enabling(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_atomic_state *old_state = old_crtc_state->base.state; - struct intel_crtc_state *pipe_config = - intel_atomic_get_new_crtc_state(to_intel_atomic_state(old_state), - crtc); - struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_primary_state = - drm_atomic_get_old_plane_state(old_state, primary); + return (!old_crtc_state->active_planes || needs_modeset(new_crtc_state)) && + new_crtc_state->active_planes; +} - intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); +static bool planes_disabling(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + return old_crtc_state->active_planes && + (!new_crtc_state->active_planes || needs_modeset(new_crtc_state)); +} - if (pipe_config->update_wm_post && pipe_config->base.active) - intel_update_watermarks(crtc); +static void intel_post_plane_update(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *new_primary_state = + intel_atomic_get_new_plane_state(state, primary); + enum pipe pipe = crtc->pipe; - if (hsw_post_update_enable_ips(old_crtc_state, pipe_config)) - hsw_enable_ips(pipe_config); + intel_frontbuffer_flip(dev_priv, new_crtc_state->fb_bits); - if (old_primary_state) { - struct drm_plane_state *new_primary_state = - drm_atomic_get_new_plane_state(old_state, primary); + if (new_crtc_state->update_wm_post && new_crtc_state->hw.active) + intel_update_watermarks(crtc); - intel_fbc_post_update(crtc); + if (hsw_post_update_enable_ips(old_crtc_state, new_crtc_state)) + hsw_enable_ips(new_crtc_state); - if (new_primary_state->visible && - (needs_modeset(&pipe_config->base) || - !old_primary_state->visible)) - intel_post_enable_primary(&crtc->base, pipe_config); - } + if (new_primary_state) + intel_fbc_post_update(crtc); - if (needs_nv12_wa(dev_priv, old_crtc_state) && - !needs_nv12_wa(dev_priv, pipe_config)) - skl_wa_827(dev_priv, crtc->pipe, false); + if (needs_nv12_wa(old_crtc_state) && + !needs_nv12_wa(new_crtc_state)) + skl_wa_827(dev_priv, pipe, false); - if (needs_scalerclk_wa(dev_priv, old_crtc_state) && - !needs_scalerclk_wa(dev_priv, pipe_config)) - icl_wa_scalerclkgating(dev_priv, crtc->pipe, false); + if (needs_scalerclk_wa(old_crtc_state) && + !needs_scalerclk_wa(new_crtc_state)) + icl_wa_scalerclkgating(dev_priv, pipe, false); } -static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, - struct intel_crtc_state *pipe_config) +static void intel_pre_plane_update(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_atomic_state *old_state = old_crtc_state->base.state; - struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_primary_state = - drm_atomic_get_old_plane_state(old_state, primary); - bool modeset = needs_modeset(&pipe_config->base); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - - if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config)) - hsw_disable_ips(old_crtc_state); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *new_primary_state = + intel_atomic_get_new_plane_state(state, primary); + enum pipe pipe = crtc->pipe; - if (old_primary_state) { - struct intel_plane_state *new_primary_state = - intel_atomic_get_new_plane_state(old_intel_state, - to_intel_plane(primary)); + if (hsw_pre_update_disable_ips(old_crtc_state, new_crtc_state)) + hsw_disable_ips(old_crtc_state); - intel_fbc_pre_update(crtc, pipe_config, new_primary_state); - /* - * Gen2 reports pipe underruns whenever all planes are disabled. - * So disable underrun reporting before all the planes get disabled. - */ - if (IS_GEN(dev_priv, 2) && old_primary_state->visible && - (modeset || !new_primary_state->base.visible)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); - } + if (new_primary_state && + intel_fbc_pre_update(crtc, new_crtc_state, new_primary_state)) + intel_wait_for_vblank(dev_priv, pipe); /* Display WA 827 */ - if (!needs_nv12_wa(dev_priv, old_crtc_state) && - needs_nv12_wa(dev_priv, pipe_config)) - skl_wa_827(dev_priv, crtc->pipe, true); + if (!needs_nv12_wa(old_crtc_state) && + needs_nv12_wa(new_crtc_state)) + skl_wa_827(dev_priv, pipe, true); /* Wa_2006604312:icl */ - if (!needs_scalerclk_wa(dev_priv, old_crtc_state) && - needs_scalerclk_wa(dev_priv, pipe_config)) - icl_wa_scalerclkgating(dev_priv, crtc->pipe, true); + if (!needs_scalerclk_wa(old_crtc_state) && + needs_scalerclk_wa(new_crtc_state)) + icl_wa_scalerclkgating(dev_priv, pipe, true); /* * Vblank time updates from the shadow to live plane control register @@ -5965,9 +6450,9 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, * event which is after the vblank start event, so we need to have a * wait-for-vblank between disabling the plane and the pipe. */ - if (HAS_GMCH(dev_priv) && old_crtc_state->base.active && - pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); + if (HAS_GMCH(dev_priv) && old_crtc_state->hw.active && + new_crtc_state->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, pipe); /* * IVB workaround: must disable low power watermarks for at least @@ -5976,36 +6461,45 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, * * WaCxSRDisabledForSpriteScaling:ivb */ - if (pipe_config->disable_lp_wm && ilk_disable_lp_wm(dev) && - old_crtc_state->base.active) - intel_wait_for_vblank(dev_priv, crtc->pipe); + if (old_crtc_state->hw.active && + new_crtc_state->disable_lp_wm && ilk_disable_lp_wm(dev_priv)) + intel_wait_for_vblank(dev_priv, pipe); /* - * If we're doing a modeset, we're done. No need to do any pre-vblank - * watermark programming here. + * If we're doing a modeset we don't need to do any + * pre-vblank watermark programming here. */ - if (needs_modeset(&pipe_config->base)) - return; + if (!needs_modeset(new_crtc_state)) { + /* + * For platforms that support atomic watermarks, program the + * 'intermediate' watermarks immediately. On pre-gen9 platforms, these + * will be the intermediate values that are safe for both pre- and + * post- vblank; when vblank happens, the 'active' values will be set + * to the final 'target' values and we'll do this again to get the + * optimal watermarks. For gen9+ platforms, the values we program here + * will be the final target values which will get automatically latched + * at vblank time; no further programming will be necessary. + * + * If a platform hasn't been transitioned to atomic watermarks yet, + * we'll continue to update watermarks the old way, if flags tell + * us to. + */ + if (dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, crtc); + else if (new_crtc_state->update_wm_pre) + intel_update_watermarks(crtc); + } /* - * For platforms that support atomic watermarks, program the - * 'intermediate' watermarks immediately. On pre-gen9 platforms, these - * will be the intermediate values that are safe for both pre- and - * post- vblank; when vblank happens, the 'active' values will be set - * to the final 'target' values and we'll do this again to get the - * optimal watermarks. For gen9+ platforms, the values we program here - * will be the final target values which will get automatically latched - * at vblank time; no further programming will be necessary. + * Gen2 reports pipe underruns whenever all planes are disabled. + * So disable underrun reporting before all the planes get disabled. * - * If a platform hasn't been transitioned to atomic watermarks yet, - * we'll continue to update watermarks the old way, if flags tell - * us to. + * We do this after .initial_watermarks() so that we have a + * chance of catching underruns with the intermediate watermarks + * vs. the old plane configuration. */ - if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(old_intel_state, - pipe_config); - else if (pipe_config->update_wm_pre) - intel_update_watermarks(crtc); + if (IS_GEN(dev_priv, 2) && planes_disabling(old_crtc_state, new_crtc_state)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); } static void intel_crtc_disable_planes(struct intel_atomic_state *state, @@ -6029,26 +6523,102 @@ static void intel_crtc_disable_planes(struct intel_atomic_state *state, intel_disable_plane(plane, new_crtc_state); - if (old_plane_state->base.visible) + if (old_plane_state->uapi.visible) fb_bits |= plane->frontbuffer_bit; } intel_frontbuffer_flip(dev_priv, fb_bits); } -static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct drm_atomic_state *old_state) +/* + * intel_connector_primary_encoder - get the primary encoder for a connector + * @connector: connector for which to return the encoder + * + * Returns the primary encoder for a connector. There is a 1:1 mapping from + * all connectors to their encoder, except for DP-MST connectors which have + * both a virtual and a primary encoder. These DP-MST primary encoders can be + * pointed to by as many DP-MST connectors as there are pipes. + */ +static struct intel_encoder * +intel_connector_primary_encoder(struct intel_connector *connector) { - struct drm_connector_state *conn_state; + struct intel_encoder *encoder; + + if (connector->mst_port) + return &dp_to_dig_port(connector->mst_port)->base; + + encoder = intel_attached_encoder(connector); + WARN_ON(!encoder); + + return encoder; +} + +static void intel_encoders_update_prepare(struct intel_atomic_state *state) +{ + struct drm_connector_state *new_conn_state; + struct drm_connector *connector; + int i; + + for_each_new_connector_in_state(&state->base, connector, new_conn_state, + i) { + struct intel_connector *intel_connector; + struct intel_encoder *encoder; + struct intel_crtc *crtc; + + if (!intel_connector_needs_modeset(state, connector)) + continue; + + intel_connector = to_intel_connector(connector); + encoder = intel_connector_primary_encoder(intel_connector); + if (!encoder->update_prepare) + continue; + + crtc = new_conn_state->crtc ? + to_intel_crtc(new_conn_state->crtc) : NULL; + encoder->update_prepare(state, encoder, crtc); + } +} + +static void intel_encoders_update_complete(struct intel_atomic_state *state) +{ + struct drm_connector_state *new_conn_state; + struct drm_connector *connector; + int i; + + for_each_new_connector_in_state(&state->base, connector, new_conn_state, + i) { + struct intel_connector *intel_connector; + struct intel_encoder *encoder; + struct intel_crtc *crtc; + + if (!intel_connector_needs_modeset(state, connector)) + continue; + + intel_connector = to_intel_connector(connector); + encoder = intel_connector_primary_encoder(intel_connector); + if (!encoder->update_complete) + continue; + + crtc = new_conn_state->crtc ? + to_intel_crtc(new_conn_state->crtc) : NULL; + encoder->update_complete(state, encoder, crtc); + } +} + +static void intel_encoders_pre_pll_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_new_connector_in_state(old_state, conn, conn_state, i) { + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); - if (conn_state->crtc != crtc) + if (conn_state->crtc != &crtc->base) continue; if (encoder->pre_pll_enable) @@ -6056,19 +6626,20 @@ static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc, } } -static void intel_encoders_pre_enable(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_pre_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *conn_state; + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_new_connector_in_state(old_state, conn, conn_state, i) { + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); - if (conn_state->crtc != crtc) + if (conn_state->crtc != &crtc->base) continue; if (encoder->pre_enable) @@ -6076,19 +6647,20 @@ static void intel_encoders_pre_enable(struct drm_crtc *crtc, } } -static void intel_encoders_enable(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *conn_state; + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_new_connector_in_state(old_state, conn, conn_state, i) { + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); - if (conn_state->crtc != crtc) + if (conn_state->crtc != &crtc->base) continue; if (encoder->enable) @@ -6097,19 +6669,20 @@ static void intel_encoders_enable(struct drm_crtc *crtc, } } -static void intel_encoders_disable(struct drm_crtc *crtc, - struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *old_conn_state; + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; struct drm_connector *conn; int i; - for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); - if (old_conn_state->crtc != crtc) + if (old_conn_state->crtc != &crtc->base) continue; intel_opregion_notify_encoder(encoder, false); @@ -6118,19 +6691,20 @@ static void intel_encoders_disable(struct drm_crtc *crtc, } } -static void intel_encoders_post_disable(struct drm_crtc *crtc, - struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_post_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *old_conn_state; + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; struct drm_connector *conn; int i; - for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); - if (old_conn_state->crtc != crtc) + if (old_conn_state->crtc != &crtc->base) continue; if (encoder->post_disable) @@ -6138,19 +6712,20 @@ static void intel_encoders_post_disable(struct drm_crtc *crtc, } } -static void intel_encoders_post_pll_disable(struct drm_crtc *crtc, - struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_post_pll_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *old_conn_state; + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; struct drm_connector *conn; int i; - for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); - if (old_conn_state->crtc != crtc) + if (old_conn_state->crtc != &crtc->base) continue; if (encoder->post_pll_disable) @@ -6158,19 +6733,20 @@ static void intel_encoders_post_pll_disable(struct drm_crtc *crtc, } } -static void intel_encoders_update_pipe(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct drm_atomic_state *old_state) +static void intel_encoders_update_pipe(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_connector_state *conn_state; + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_new_connector_in_state(old_state, conn, conn_state, i) { + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); - if (conn_state->crtc != crtc) + if (conn_state->crtc != &crtc->base) continue; if (encoder->update_pipe) @@ -6180,24 +6756,21 @@ static void intel_encoders_update_pipe(struct drm_crtc *crtc, static void intel_disable_primary_plane(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_plane *plane = to_intel_plane(crtc->base.primary); plane->disable_plane(plane, crtc_state); } -static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) +static void ilk_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; - if (WARN_ON(intel_crtc->active)) + if (WARN_ON(crtc->active)) return; /* @@ -6213,61 +6786,59 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false); - if (pipe_config->has_pch_encoder) - intel_prepare_shared_dpll(pipe_config); + if (new_crtc_state->has_pch_encoder) + intel_prepare_shared_dpll(new_crtc_state); - if (intel_crtc_has_dp_encoder(pipe_config)) - intel_dp_set_m_n(pipe_config, M1_N1); + if (intel_crtc_has_dp_encoder(new_crtc_state)) + intel_dp_set_m_n(new_crtc_state, M1_N1); - intel_set_pipe_timings(pipe_config); - intel_set_pipe_src_size(pipe_config); + intel_set_pipe_timings(new_crtc_state); + intel_set_pipe_src_size(new_crtc_state); - if (pipe_config->has_pch_encoder) { - intel_cpu_transcoder_set_m_n(pipe_config, - &pipe_config->fdi_m_n, NULL); - } + if (new_crtc_state->has_pch_encoder) + intel_cpu_transcoder_set_m_n(new_crtc_state, + &new_crtc_state->fdi_m_n, NULL); - ironlake_set_pipeconf(pipe_config); + ilk_set_pipeconf(new_crtc_state); - intel_crtc->active = true; + crtc->active = true; - intel_encoders_pre_enable(crtc, pipe_config, old_state); + intel_encoders_pre_enable(state, crtc); - if (pipe_config->has_pch_encoder) { + if (new_crtc_state->has_pch_encoder) { /* Note: FDI PLL enabling _must_ be done before we enable the * cpu pipes, hence this is separate from all the other fdi/pch * enabling. */ - ironlake_fdi_pll_enable(pipe_config); + ilk_fdi_pll_enable(new_crtc_state); } else { assert_fdi_tx_disabled(dev_priv, pipe); assert_fdi_rx_disabled(dev_priv, pipe); } - ironlake_pfit_enable(pipe_config); + ilk_pfit_enable(new_crtc_state); /* * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ - intel_color_load_luts(pipe_config); - intel_color_commit(pipe_config); + intel_color_load_luts(new_crtc_state); + intel_color_commit(new_crtc_state); /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(pipe_config); + intel_disable_primary_plane(new_crtc_state); - if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(old_intel_state, pipe_config); - intel_enable_pipe(pipe_config); + if (dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, crtc); + intel_enable_pipe(new_crtc_state); - if (pipe_config->has_pch_encoder) - ironlake_pch_enable(old_intel_state, pipe_config); + if (new_crtc_state->has_pch_encoder) + ilk_pch_enable(state, new_crtc_state); - assert_vblank_disabled(crtc); - intel_crtc_vblank_on(pipe_config); + intel_crtc_vblank_on(new_crtc_state); - intel_encoders_enable(crtc, pipe_config, old_state); + intel_encoders_enable(state, crtc); if (HAS_PCH_CPT(dev_priv)) - cpt_verify_modeset(dev, intel_crtc->pipe); + cpt_verify_modeset(dev_priv, pipe); /* * Must wait for vblank to avoid spurious PCH FIFO underruns. @@ -6275,7 +6846,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, * some interlaced HDMI modes. Let's do the double wait always * in case there are more corner cases we don't know about. */ - if (pipe_config->has_pch_encoder) { + if (new_crtc_state->has_pch_encoder) { intel_wait_for_vblank(dev_priv, pipe); intel_wait_for_vblank(dev_priv, pipe); } @@ -6310,109 +6881,124 @@ static void icl_pipe_mbus_enable(struct intel_crtc *crtc) u32 val; val = MBUS_DBOX_A_CREDIT(2); - val |= MBUS_DBOX_BW_CREDIT(1); - val |= MBUS_DBOX_B_CREDIT(8); + + if (INTEL_GEN(dev_priv) >= 12) { + val |= MBUS_DBOX_BW_CREDIT(2); + val |= MBUS_DBOX_B_CREDIT(12); + } else { + val |= MBUS_DBOX_BW_CREDIT(1); + val |= MBUS_DBOX_B_CREDIT(8); + } I915_WRITE(PIPE_MBUS_DBOX_CTL(pipe), val); } -static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) +static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe, hsw_workaround_pipe; - enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + i915_reg_t reg = CHICKEN_TRANS(crtc_state->cpu_transcoder); + u32 val; + + val = I915_READ(reg); + val &= ~HSW_FRAME_START_DELAY_MASK; + val |= HSW_FRAME_START_DELAY(0); + I915_WRITE(reg, val); +} + +static void hsw_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe, hsw_workaround_pipe; + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; bool psl_clkgate_wa; - if (WARN_ON(intel_crtc->active)) + if (WARN_ON(crtc->active)) return; - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); + intel_encoders_pre_pll_enable(state, crtc); - if (pipe_config->shared_dpll) - intel_enable_shared_dpll(pipe_config); + if (new_crtc_state->shared_dpll) + intel_enable_shared_dpll(new_crtc_state); - intel_encoders_pre_enable(crtc, pipe_config, old_state); + intel_encoders_pre_enable(state, crtc); - if (intel_crtc_has_dp_encoder(pipe_config)) - intel_dp_set_m_n(pipe_config, M1_N1); + if (intel_crtc_has_dp_encoder(new_crtc_state)) + intel_dp_set_m_n(new_crtc_state, M1_N1); if (!transcoder_is_dsi(cpu_transcoder)) - intel_set_pipe_timings(pipe_config); + intel_set_pipe_timings(new_crtc_state); - intel_set_pipe_src_size(pipe_config); + if (INTEL_GEN(dev_priv) >= 11) + icl_enable_trans_port_sync(new_crtc_state); + + intel_set_pipe_src_size(new_crtc_state); if (cpu_transcoder != TRANSCODER_EDP && - !transcoder_is_dsi(cpu_transcoder)) { + !transcoder_is_dsi(cpu_transcoder)) I915_WRITE(PIPE_MULT(cpu_transcoder), - pipe_config->pixel_multiplier - 1); - } + new_crtc_state->pixel_multiplier - 1); - if (pipe_config->has_pch_encoder) { - intel_cpu_transcoder_set_m_n(pipe_config, - &pipe_config->fdi_m_n, NULL); - } + if (new_crtc_state->has_pch_encoder) + intel_cpu_transcoder_set_m_n(new_crtc_state, + &new_crtc_state->fdi_m_n, NULL); - if (!transcoder_is_dsi(cpu_transcoder)) - haswell_set_pipeconf(pipe_config); + if (!transcoder_is_dsi(cpu_transcoder)) { + hsw_set_frame_start_delay(new_crtc_state); + hsw_set_pipeconf(new_crtc_state); + } if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipemisc(pipe_config); + bdw_set_pipemisc(new_crtc_state); - intel_crtc->active = true; + crtc->active = true; /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && - pipe_config->pch_pfit.enabled; + new_crtc_state->pch_pfit.enabled; if (psl_clkgate_wa) glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); if (INTEL_GEN(dev_priv) >= 9) - skylake_pfit_enable(pipe_config); + skl_pfit_enable(new_crtc_state); else - ironlake_pfit_enable(pipe_config); + ilk_pfit_enable(new_crtc_state); /* * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ - intel_color_load_luts(pipe_config); - intel_color_commit(pipe_config); + intel_color_load_luts(new_crtc_state); + intel_color_commit(new_crtc_state); /* update DSPCNTR to configure gamma/csc for pipe bottom color */ if (INTEL_GEN(dev_priv) < 9) - intel_disable_primary_plane(pipe_config); + intel_disable_primary_plane(new_crtc_state); if (INTEL_GEN(dev_priv) >= 11) - icl_set_pipe_chicken(intel_crtc); + icl_set_pipe_chicken(crtc); - intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(pipe_config); + intel_ddi_enable_transcoder_func(new_crtc_state); - if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(old_intel_state, pipe_config); + if (dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, crtc); if (INTEL_GEN(dev_priv) >= 11) - icl_pipe_mbus_enable(intel_crtc); + icl_pipe_mbus_enable(crtc); /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) - intel_enable_pipe(pipe_config); - - if (pipe_config->has_pch_encoder) - lpt_pch_enable(old_intel_state, pipe_config); + intel_enable_pipe(new_crtc_state); - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(pipe_config, true); + if (new_crtc_state->has_pch_encoder) + lpt_pch_enable(state, new_crtc_state); - assert_vblank_disabled(crtc); - intel_crtc_vblank_on(pipe_config); + intel_crtc_vblank_on(new_crtc_state); - intel_encoders_enable(crtc, pipe_config, old_state); + intel_encoders_enable(state, crtc); if (psl_clkgate_wa) { intel_wait_for_vblank(dev_priv, pipe); @@ -6421,16 +7007,16 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, /* If we change the relative order between pipe/planes enabling, we need * to change the workaround. */ - hsw_workaround_pipe = pipe_config->hsw_workaround_pipe; + hsw_workaround_pipe = new_crtc_state->hsw_workaround_pipe; if (IS_HASWELL(dev_priv) && hsw_workaround_pipe != INVALID_PIPE) { intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); } } -static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state) +void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -6443,14 +7029,13 @@ static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state) } } -static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void ilk_crtc_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_crtc *crtc = old_crtc_state->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; /* * Sometimes spurious CPU pipe underruns happen when the @@ -6460,22 +7045,21 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false); - intel_encoders_disable(crtc, old_crtc_state, old_state); + intel_encoders_disable(state, crtc); - drm_crtc_vblank_off(crtc); - assert_vblank_disabled(crtc); + intel_crtc_vblank_off(old_crtc_state); intel_disable_pipe(old_crtc_state); - ironlake_pfit_disable(old_crtc_state); + ilk_pfit_disable(old_crtc_state); if (old_crtc_state->has_pch_encoder) - ironlake_fdi_disable(crtc); + ilk_fdi_disable(crtc); - intel_encoders_post_disable(crtc, old_crtc_state, old_state); + intel_encoders_post_disable(state, crtc); if (old_crtc_state->has_pch_encoder) { - ironlake_disable_pch_transcoder(dev_priv, pipe); + ilk_disable_pch_transcoder(dev_priv, pipe); if (HAS_PCH_CPT(dev_priv)) { i915_reg_t reg; @@ -6495,51 +7079,27 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, I915_WRITE(PCH_DPLL_SEL, temp); } - ironlake_fdi_pll_disable(intel_crtc); + ilk_fdi_pll_disable(crtc); } intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true); } -static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void hsw_crtc_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_crtc *crtc = old_crtc_state->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; - - intel_encoders_disable(crtc, old_crtc_state, old_state); - - drm_crtc_vblank_off(crtc); - assert_vblank_disabled(crtc); - - /* XXX: Do the pipe assertions at the right place for BXT DSI. */ - if (!transcoder_is_dsi(cpu_transcoder)) - intel_disable_pipe(old_crtc_state); - - if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(old_crtc_state, false); - - if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_transcoder_func(old_crtc_state); - - intel_dsc_disable(old_crtc_state); - - if (INTEL_GEN(dev_priv) >= 9) - skylake_scaler_disable(intel_crtc); - else - ironlake_pfit_disable(old_crtc_state); - - intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state); + /* + * FIXME collapse everything to one hook. + * Need care with mst->ddi interactions. + */ + intel_encoders_disable(state, crtc); + intel_encoders_post_disable(state, crtc); } static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (!crtc_state->gmch_pfit.control) @@ -6550,7 +7110,7 @@ static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state) * according to register description and PRM. */ WARN_ON(I915_READ(PFIT_CONTROL) & PFIT_ENABLE); - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder); I915_WRITE(PFIT_PGM_RATIOS, crtc_state->gmch_pfit.pgm_ratios); I915_WRITE(PFIT_CONTROL, crtc_state->gmch_pfit.control); @@ -6560,33 +7120,47 @@ static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -bool intel_port_is_combophy(struct drm_i915_private *dev_priv, enum port port) +bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) { - if (port == PORT_NONE) + if (phy == PHY_NONE) return false; if (IS_ELKHARTLAKE(dev_priv)) - return port <= PORT_C; + return phy <= PHY_C; if (INTEL_GEN(dev_priv) >= 11) - return port <= PORT_B; + return phy <= PHY_B; return false; } -bool intel_port_is_tc(struct drm_i915_private *dev_priv, enum port port) +bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) { + if (INTEL_GEN(dev_priv) >= 12) + return phy >= PHY_D && phy <= PHY_I; + if (INTEL_GEN(dev_priv) >= 11 && !IS_ELKHARTLAKE(dev_priv)) - return port >= PORT_C && port <= PORT_F; + return phy >= PHY_C && phy <= PHY_F; return false; } +enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port) +{ + if (IS_ELKHARTLAKE(i915) && port == PORT_D) + return PHY_A; + + return (enum phy)port; +} + enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, enum port port) { - if (!intel_port_is_tc(dev_priv, port)) + if (!intel_phy_is_tc(dev_priv, intel_port_to_phy(dev_priv, port))) return PORT_TC_NONE; + if (INTEL_GEN(dev_priv) >= 12) + return port - PORT_D; + return port - PORT_C; } @@ -6605,6 +7179,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_E_LANES; case PORT_F: return POWER_DOMAIN_PORT_DDI_F_LANES; + case PORT_G: + return POWER_DOMAIN_PORT_DDI_G_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -6614,6 +7190,28 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) enum intel_display_power_domain intel_aux_power_domain(struct intel_digital_port *dig_port) { + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); + + if (intel_phy_is_tc(dev_priv, phy) && + dig_port->tc_mode == TC_PORT_TBT_ALT) { + switch (dig_port->aux_ch) { + case AUX_CH_C: + return POWER_DOMAIN_AUX_C_TBT; + case AUX_CH_D: + return POWER_DOMAIN_AUX_D_TBT; + case AUX_CH_E: + return POWER_DOMAIN_AUX_E_TBT; + case AUX_CH_F: + return POWER_DOMAIN_AUX_F_TBT; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G_TBT; + default: + MISSING_CASE(dig_port->aux_ch); + return POWER_DOMAIN_AUX_C_TBT; + } + } + switch (dig_port->aux_ch) { case AUX_CH_A: return POWER_DOMAIN_AUX_A; @@ -6627,24 +7225,24 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_E; case AUX_CH_F: return POWER_DOMAIN_AUX_F; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_A; } } -static u64 get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_encoder *encoder; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; - if (!crtc_state->base.active) + if (!crtc_state->hw.active) return 0; mask = BIT_ULL(POWER_DOMAIN_PIPE(pipe)); @@ -6653,7 +7251,8 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, crtc_state->pch_pfit.force_thru) mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); - drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { + drm_for_each_encoder_mask(encoder, &dev_priv->drm, + crtc_state->uapi.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); mask |= BIT_ULL(intel_encoder->power_domain); @@ -6669,17 +7268,16 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, } static u64 -modeset_get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +modeset_get_crtc_power_domains(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain domain; u64 domains, new_domains, old_domains; - old_domains = intel_crtc->enabled_power_domains; - intel_crtc->enabled_power_domains = new_domains = - get_crtc_power_domains(crtc, crtc_state); + old_domains = crtc->enabled_power_domains; + crtc->enabled_power_domains = new_domains = + get_crtc_power_domains(crtc_state); domains = new_domains & ~old_domains; @@ -6698,151 +7296,140 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put_unchecked(dev_priv, domain); } -static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) +static void valleyview_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; - if (WARN_ON(intel_crtc->active)) + if (WARN_ON(crtc->active)) return; - if (intel_crtc_has_dp_encoder(pipe_config)) - intel_dp_set_m_n(pipe_config, M1_N1); + if (intel_crtc_has_dp_encoder(new_crtc_state)) + intel_dp_set_m_n(new_crtc_state, M1_N1); - intel_set_pipe_timings(pipe_config); - intel_set_pipe_src_size(pipe_config); + intel_set_pipe_timings(new_crtc_state); + intel_set_pipe_src_size(new_crtc_state); if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { I915_WRITE(CHV_BLEND(pipe), CHV_BLEND_LEGACY); I915_WRITE(CHV_CANVAS(pipe), 0); } - i9xx_set_pipeconf(pipe_config); + i9xx_set_pipeconf(new_crtc_state); - intel_crtc->active = true; + crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); + intel_encoders_pre_pll_enable(state, crtc); if (IS_CHERRYVIEW(dev_priv)) { - chv_prepare_pll(intel_crtc, pipe_config); - chv_enable_pll(intel_crtc, pipe_config); + chv_prepare_pll(crtc, new_crtc_state); + chv_enable_pll(crtc, new_crtc_state); } else { - vlv_prepare_pll(intel_crtc, pipe_config); - vlv_enable_pll(intel_crtc, pipe_config); + vlv_prepare_pll(crtc, new_crtc_state); + vlv_enable_pll(crtc, new_crtc_state); } - intel_encoders_pre_enable(crtc, pipe_config, old_state); + intel_encoders_pre_enable(state, crtc); - i9xx_pfit_enable(pipe_config); + i9xx_pfit_enable(new_crtc_state); - intel_color_load_luts(pipe_config); - intel_color_commit(pipe_config); + intel_color_load_luts(new_crtc_state); + intel_color_commit(new_crtc_state); /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(pipe_config); + intel_disable_primary_plane(new_crtc_state); - dev_priv->display.initial_watermarks(old_intel_state, - pipe_config); - intel_enable_pipe(pipe_config); + dev_priv->display.initial_watermarks(state, crtc); + intel_enable_pipe(new_crtc_state); - assert_vblank_disabled(crtc); - intel_crtc_vblank_on(pipe_config); + intel_crtc_vblank_on(new_crtc_state); - intel_encoders_enable(crtc, pipe_config, old_state); + intel_encoders_enable(state, crtc); } static void i9xx_set_pll_dividers(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); I915_WRITE(FP0(crtc->pipe), crtc_state->dpll_hw_state.fp0); I915_WRITE(FP1(crtc->pipe), crtc_state->dpll_hw_state.fp1); } -static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) +static void i9xx_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; - if (WARN_ON(intel_crtc->active)) + if (WARN_ON(crtc->active)) return; - i9xx_set_pll_dividers(pipe_config); + i9xx_set_pll_dividers(new_crtc_state); - if (intel_crtc_has_dp_encoder(pipe_config)) - intel_dp_set_m_n(pipe_config, M1_N1); + if (intel_crtc_has_dp_encoder(new_crtc_state)) + intel_dp_set_m_n(new_crtc_state, M1_N1); - intel_set_pipe_timings(pipe_config); - intel_set_pipe_src_size(pipe_config); + intel_set_pipe_timings(new_crtc_state); + intel_set_pipe_src_size(new_crtc_state); - i9xx_set_pipeconf(pipe_config); + i9xx_set_pipeconf(new_crtc_state); - intel_crtc->active = true; + crtc->active = true; if (!IS_GEN(dev_priv, 2)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_enable(crtc, pipe_config, old_state); + intel_encoders_pre_enable(state, crtc); - i9xx_enable_pll(intel_crtc, pipe_config); + i9xx_enable_pll(crtc, new_crtc_state); - i9xx_pfit_enable(pipe_config); + i9xx_pfit_enable(new_crtc_state); - intel_color_load_luts(pipe_config); - intel_color_commit(pipe_config); + intel_color_load_luts(new_crtc_state); + intel_color_commit(new_crtc_state); /* update DSPCNTR to configure gamma for pipe bottom color */ - intel_disable_primary_plane(pipe_config); + intel_disable_primary_plane(new_crtc_state); - if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(old_intel_state, - pipe_config); + if (dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, crtc); else - intel_update_watermarks(intel_crtc); - intel_enable_pipe(pipe_config); + intel_update_watermarks(crtc); + intel_enable_pipe(new_crtc_state); - assert_vblank_disabled(crtc); - intel_crtc_vblank_on(pipe_config); + intel_crtc_vblank_on(new_crtc_state); - intel_encoders_enable(crtc, pipe_config, old_state); + intel_encoders_enable(state, crtc); } static void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (!old_crtc_state->gmch_pfit.control) return; - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder); DRM_DEBUG_KMS("disabling pfit, current: 0x%08x\n", I915_READ(PFIT_CONTROL)); I915_WRITE(PFIT_CONTROL, 0); } -static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) +static void i9xx_crtc_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_crtc *crtc = old_crtc_state->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; /* * On gen2 planes are double buffered but the pipe isn't, so we must @@ -6851,16 +7438,15 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (IS_GEN(dev_priv, 2)) intel_wait_for_vblank(dev_priv, pipe); - intel_encoders_disable(crtc, old_crtc_state, old_state); + intel_encoders_disable(state, crtc); - drm_crtc_vblank_off(crtc); - assert_vblank_disabled(crtc); + intel_crtc_vblank_off(old_crtc_state); intel_disable_pipe(old_crtc_state); i9xx_pfit_disable(old_crtc_state); - intel_encoders_post_disable(crtc, old_crtc_state, old_state); + intel_encoders_post_disable(state, crtc); if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DSI)) { if (IS_CHERRYVIEW(dev_priv)) @@ -6871,92 +7457,97 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, i9xx_disable_pll(old_crtc_state); } - intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state); + intel_encoders_post_pll_disable(state, crtc); if (!IS_GEN(dev_priv, 2)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); if (!dev_priv->display.initial_watermarks) - intel_update_watermarks(intel_crtc); + intel_update_watermarks(crtc); /* clock the pipe down to 640x480@60 to potentially save power */ if (IS_I830(dev_priv)) i830_enable_pipe(dev_priv, pipe); } -static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, +static void intel_crtc_disable_noatomic(struct intel_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *encoder; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_bw_state *bw_state = to_intel_bw_state(dev_priv->bw_obj.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); enum intel_display_power_domain domain; struct intel_plane *plane; - u64 domains; struct drm_atomic_state *state; - struct intel_crtc_state *crtc_state; + struct intel_crtc_state *temp_crtc_state; + enum pipe pipe = crtc->pipe; + u64 domains; int ret; - if (!intel_crtc->active) + if (!crtc_state->hw.active) return; - for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible) - intel_plane_disable_noatomic(intel_crtc, plane); + if (plane_state->uapi.visible) + intel_plane_disable_noatomic(crtc, plane); } - state = drm_atomic_state_alloc(crtc->dev); + state = drm_atomic_state_alloc(&dev_priv->drm); if (!state) { DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", - crtc->base.id, crtc->name); + crtc->base.base.id, crtc->base.name); return; } state->acquire_ctx = ctx; /* Everything's already locked, -EDEADLK can't happen. */ - crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); - ret = drm_atomic_add_affected_connectors(state, crtc); + temp_crtc_state = intel_atomic_get_crtc_state(state, crtc); + ret = drm_atomic_add_affected_connectors(state, &crtc->base); - WARN_ON(IS_ERR(crtc_state) || ret); + WARN_ON(IS_ERR(temp_crtc_state) || ret); - dev_priv->display.crtc_disable(crtc_state, state); + dev_priv->display.crtc_disable(to_intel_atomic_state(state), crtc); drm_atomic_state_put(state); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n", - crtc->base.id, crtc->name); + crtc->base.base.id, crtc->base.name); - WARN_ON(drm_atomic_set_mode_for_crtc(crtc->state, NULL) < 0); - crtc->state->active = false; - intel_crtc->active = false; - crtc->enabled = false; - crtc->state->connector_mask = 0; - crtc->state->encoder_mask = 0; + crtc->active = false; + crtc->base.enabled = false; - for_each_encoder_on_crtc(crtc->dev, crtc, encoder) + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, NULL) < 0); + crtc_state->uapi.active = false; + crtc_state->uapi.connector_mask = 0; + crtc_state->uapi.encoder_mask = 0; + intel_crtc_free_hw_state(crtc_state); + memset(&crtc_state->hw, 0, sizeof(crtc_state->hw)); + + for_each_encoder_on_crtc(&dev_priv->drm, &crtc->base, encoder) encoder->base.crtc = NULL; - intel_fbc_disable(intel_crtc); - intel_update_watermarks(intel_crtc); - intel_disable_shared_dpll(to_intel_crtc_state(crtc->state)); + intel_fbc_disable(crtc); + intel_update_watermarks(crtc); + intel_disable_shared_dpll(crtc_state); - domains = intel_crtc->enabled_power_domains; + domains = crtc->enabled_power_domains; for_each_power_domain(domain, domains) intel_display_power_put_unchecked(dev_priv, domain); - intel_crtc->enabled_power_domains = 0; + crtc->enabled_power_domains = 0; - dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); - dev_priv->min_cdclk[intel_crtc->pipe] = 0; - dev_priv->min_voltage_level[intel_crtc->pipe] = 0; + dev_priv->active_pipes &= ~BIT(pipe); + dev_priv->min_cdclk[pipe] = 0; + dev_priv->min_voltage_level[pipe] = 0; - bw_state->data_rate[intel_crtc->pipe] = 0; - bw_state->num_active_planes[intel_crtc->pipe] = 0; + bw_state->data_rate[pipe] = 0; + bw_state->num_active_planes[pipe] = 0; } /* @@ -6988,7 +7579,7 @@ void intel_encoder_destroy(struct drm_encoder *encoder) /* Cross check the actual hw state with our own modeset state tracking (and it's * internal consistency). */ -static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, +static void intel_connector_verify_state(struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -7006,8 +7597,8 @@ static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, if (!crtc_state) return; - I915_STATE_WARN(!crtc_state->active, - "connector is active, but attached crtc isn't\n"); + I915_STATE_WARN(!crtc_state->hw.active, + "connector is active, but attached crtc isn't\n"); if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST) return; @@ -7018,8 +7609,8 @@ static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, I915_STATE_WARN(conn_state->crtc != encoder->base.crtc, "attached encoder crtc differs from connector crtc\n"); } else { - I915_STATE_WARN(crtc_state && crtc_state->active, - "attached crtc is active, but connector isn't\n"); + I915_STATE_WARN(crtc_state && crtc_state->hw.active, + "attached crtc is active, but connector isn't\n"); I915_STATE_WARN(!crtc_state && conn_state->best_encoder, "best encoder set without crtc!\n"); } @@ -7027,17 +7618,17 @@ static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state) { - if (crtc_state->base.enable && crtc_state->has_pch_encoder) + if (crtc_state->hw.enable && crtc_state->has_pch_encoder) return crtc_state->fdi_lanes; return 0; } -static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, - struct intel_crtc_state *pipe_config) +static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_atomic_state *state = pipe_config->base.state; + struct drm_atomic_state *state = pipe_config->uapi.state; struct intel_crtc *other_crtc; struct intel_crtc_state *other_crtc_state; @@ -7059,7 +7650,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } - if (INTEL_INFO(dev_priv)->num_pipes == 2) + if (INTEL_NUM_PIPES(dev_priv) == 2) return 0; /* Ivybridge 3 pipe is really complicated */ @@ -7106,11 +7697,11 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } #define RETRY 1 -static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config) +static int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = intel_crtc->base.dev; - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int lane, link_bw, fdi_dotclock, ret; bool needs_recompute = false; @@ -7126,15 +7717,15 @@ retry: fdi_dotclock = adjusted_mode->crtc_clock; - lane = ironlake_get_lanes_required(fdi_dotclock, link_bw, - pipe_config->pipe_bpp); + lane = ilk_get_lanes_required(fdi_dotclock, link_bw, + pipe_config->pipe_bpp); pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + link_bw, &pipe_config->fdi_m_n, false, false); - ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); + ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) return ret; @@ -7156,7 +7747,7 @@ retry: bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); /* IPS only exists on ULT machines and is tied to pipe A. */ @@ -7186,9 +7777,9 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) static bool hsw_compute_ips_config(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = - to_i915(crtc_state->base.crtc->dev); + to_i915(crtc_state->uapi.crtc->dev); struct intel_atomic_state *intel_state = - to_intel_atomic_state(crtc_state->base.state); + to_intel_atomic_state(crtc_state->uapi.state); if (!hsw_crtc_state_ips_capable(crtc_state)) return false; @@ -7227,7 +7818,7 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) { u32 pixel_rate; - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + pixel_rate = pipe_config->hw.adjusted_mode.crtc_clock; /* * We only use IF-ID interlacing. If we ever use @@ -7260,12 +7851,12 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); if (HAS_GMCH(dev_priv)) /* FIXME calculate proper pipe pixel rate for GMCH pfit */ crtc_state->pixel_rate = - crtc_state->base.adjusted_mode.crtc_clock; + crtc_state->hw.adjusted_mode.crtc_clock; else crtc_state->pixel_rate = ilk_pipe_pixel_rate(crtc_state); @@ -7275,7 +7866,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int clock_limit = dev_priv->max_dotclk_freq; if (INTEL_GEN(dev_priv) < 4) { @@ -7301,7 +7892,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, if ((pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) && - pipe_config->base.ctm) { + pipe_config->hw.ctm) { /* * There is only one pipe CSC unit per pipe, and we need that * for output conversion from RGB->YCBCR. So if CTM is already @@ -7340,7 +7931,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, intel_crtc_compute_pixel_rate(pipe_config); if (pipe_config->has_pch_encoder) - return ironlake_fdi_compute_config(crtc, pipe_config); + return ilk_fdi_compute_config(crtc, pipe_config); return 0; } @@ -7379,11 +7970,15 @@ void intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n) + bool constant_n, bool fec_enable) { - m_n->tu = 64; + u32 data_clock = bits_per_pixel * pixel_clock; - compute_m_n(bits_per_pixel * pixel_clock, + if (fec_enable) + data_clock = intel_dp_mode_to_fec_clock(data_clock); + + m_n->tu = 64; + compute_m_n(data_clock, link_clock * nlanes * 8, &m_n->gmch_m, &m_n->gmch_n, constant_n); @@ -7393,6 +7988,27 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, constant_n); } +static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv) +{ + /* + * There may be no VBT; and if the BIOS enabled SSC we can + * just keep using it to avoid unnecessary flicker. Whereas if the + * BIOS isn't using it, don't assume it will work even if the VBT + * indicates as much. + */ + if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { + bool bios_lvds_use_ssc = I915_READ(PCH_DREF_CONTROL) & + DREF_SSC1_ENABLE; + + if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { + DRM_DEBUG_KMS("SSC %s by BIOS, overriding VBT which says %s\n", + enableddisabled(bios_lvds_use_ssc), + enableddisabled(dev_priv->vbt.lvds_use_ssc)); + dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; + } + } +} + static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { if (i915_modparams.panel_use_ssc >= 0) @@ -7470,7 +8086,7 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe static void intel_pch_transcoder_set_m_n(const struct intel_crtc_state *crtc_state, const struct intel_link_m_n *m_n) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -7497,7 +8113,7 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta const struct intel_link_m_n *m_n, const struct intel_link_m_n *m2_n2) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum transcoder transcoder = crtc_state->cpu_transcoder; @@ -7806,11 +8422,11 @@ int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); struct intel_crtc_state *pipe_config; - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); + pipe_config = intel_crtc_state_alloc(crtc); if (!pipe_config) return -ENOMEM; - pipe_config->base.crtc = &crtc->base; + pipe_config->cpu_transcoder = (enum transcoder)pipe; pipe_config->pixel_multiplier = 1; pipe_config->dpll = *dpll; @@ -7970,11 +8586,11 @@ static void i8xx_compute_dpll(struct intel_crtc *crtc, static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; u32 crtc_vtotal, crtc_vblank_end; int vsyncshift = 0; @@ -8032,7 +8648,7 @@ static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state) static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -8044,6 +8660,21 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) (crtc_state->pipe_src_h - 1)); } +static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (IS_GEN(dev_priv, 2)) + return false; + + if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK_HSW; + else + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK; +} + static void intel_get_pipe_timings(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -8053,39 +8684,39 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, u32 tmp; tmp = I915_READ(HTOTAL(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1; if (!transcoder_is_dsi(cpu_transcoder)) { tmp = I915_READ(HBLANK(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_hblank_start = + pipe_config->hw.adjusted_mode.crtc_hblank_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_hblank_end = + pipe_config->hw.adjusted_mode.crtc_hblank_end = ((tmp >> 16) & 0xffff) + 1; } tmp = I915_READ(HSYNC(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1; tmp = I915_READ(VTOTAL(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1; if (!transcoder_is_dsi(cpu_transcoder)) { tmp = I915_READ(VBLANK(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_vblank_start = + pipe_config->hw.adjusted_mode.crtc_vblank_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_vblank_end = + pipe_config->hw.adjusted_mode.crtc_vblank_end = ((tmp >> 16) & 0xffff) + 1; } tmp = I915_READ(VSYNC(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1; + pipe_config->hw.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1; - if (I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK) { - pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE; - pipe_config->base.adjusted_mode.crtc_vtotal += 1; - pipe_config->base.adjusted_mode.crtc_vblank_end += 1; + if (intel_pipe_is_interlaced(pipe_config)) { + pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE; + pipe_config->hw.adjusted_mode.crtc_vtotal += 1; + pipe_config->hw.adjusted_mode.crtc_vblank_end += 1; } } @@ -8100,27 +8731,27 @@ static void intel_get_pipe_src_size(struct intel_crtc *crtc, pipe_config->pipe_src_h = (tmp & 0xffff) + 1; pipe_config->pipe_src_w = ((tmp >> 16) & 0xffff) + 1; - pipe_config->base.mode.vdisplay = pipe_config->pipe_src_h; - pipe_config->base.mode.hdisplay = pipe_config->pipe_src_w; + pipe_config->hw.mode.vdisplay = pipe_config->pipe_src_h; + pipe_config->hw.mode.hdisplay = pipe_config->pipe_src_w; } void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config) { - mode->hdisplay = pipe_config->base.adjusted_mode.crtc_hdisplay; - mode->htotal = pipe_config->base.adjusted_mode.crtc_htotal; - mode->hsync_start = pipe_config->base.adjusted_mode.crtc_hsync_start; - mode->hsync_end = pipe_config->base.adjusted_mode.crtc_hsync_end; + mode->hdisplay = pipe_config->hw.adjusted_mode.crtc_hdisplay; + mode->htotal = pipe_config->hw.adjusted_mode.crtc_htotal; + mode->hsync_start = pipe_config->hw.adjusted_mode.crtc_hsync_start; + mode->hsync_end = pipe_config->hw.adjusted_mode.crtc_hsync_end; - mode->vdisplay = pipe_config->base.adjusted_mode.crtc_vdisplay; - mode->vtotal = pipe_config->base.adjusted_mode.crtc_vtotal; - mode->vsync_start = pipe_config->base.adjusted_mode.crtc_vsync_start; - mode->vsync_end = pipe_config->base.adjusted_mode.crtc_vsync_end; + mode->vdisplay = pipe_config->hw.adjusted_mode.crtc_vdisplay; + mode->vtotal = pipe_config->hw.adjusted_mode.crtc_vtotal; + mode->vsync_start = pipe_config->hw.adjusted_mode.crtc_vsync_start; + mode->vsync_end = pipe_config->hw.adjusted_mode.crtc_vsync_end; - mode->flags = pipe_config->base.adjusted_mode.flags; + mode->flags = pipe_config->hw.adjusted_mode.flags; mode->type = DRM_MODE_TYPE_DRIVER; - mode->clock = pipe_config->base.adjusted_mode.crtc_clock; + mode->clock = pipe_config->hw.adjusted_mode.crtc_clock; mode->hsync = drm_mode_hsync(mode); mode->vrefresh = drm_mode_vrefresh(mode); @@ -8129,7 +8760,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 pipeconf; @@ -8166,7 +8797,7 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state) } } - if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { if (INTEL_GEN(dev_priv) < 4 || intel_crtc_has_type(crtc_state, INTEL_OUTPUT_SDVO)) pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION; @@ -8182,6 +8813,8 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state) pipeconf |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode); + pipeconf |= PIPECONF_FRAME_START_DELAY(0); + I915_WRITE(PIPECONF(crtc->pipe), pipeconf); POSTING_READ(PIPECONF(crtc->pipe)); } @@ -8281,9 +8914,9 @@ static int pnv_crtc_compute_clock(struct intel_crtc *crtc, DRM_DEBUG_KMS("using SSC reference clock of %d kHz\n", refclk); } - limit = &intel_limits_pineview_lvds; + limit = &pnv_limits_lvds; } else { - limit = &intel_limits_pineview_sdvo; + limit = &pnv_limits_sdvo; } if (!crtc_state->clock_set && @@ -8414,7 +9047,7 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; struct dpll clock; u32 mdiv; int refclk = 100000; @@ -8524,7 +9157,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; enum dpio_channel port = vlv_pipe_to_channel(pipe); struct dpll clock; u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; @@ -8553,52 +9186,29 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock); } -static void intel_get_crtc_ycbcr_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static enum intel_output_format +bdw_get_pipemisc_output_format(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum intel_output_format output = INTEL_OUTPUT_FORMAT_RGB; - - pipe_config->lspcon_downsampling = false; + u32 tmp; - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { - u32 tmp = I915_READ(PIPEMISC(crtc->pipe)); + tmp = I915_READ(PIPEMISC(crtc->pipe)); - if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { - bool ycbcr420_enabled = tmp & PIPEMISC_YUV420_ENABLE; - bool blend = tmp & PIPEMISC_YUV420_MODE_FULL_BLEND; + if (tmp & PIPEMISC_YUV420_ENABLE) { + /* We support 4:2:0 in full blend mode only */ + WARN_ON((tmp & PIPEMISC_YUV420_MODE_FULL_BLEND) == 0); - if (ycbcr420_enabled) { - /* We support 4:2:0 in full blend mode only */ - if (!blend) - output = INTEL_OUTPUT_FORMAT_INVALID; - else if (!(IS_GEMINILAKE(dev_priv) || - INTEL_GEN(dev_priv) >= 10)) - output = INTEL_OUTPUT_FORMAT_INVALID; - else - output = INTEL_OUTPUT_FORMAT_YCBCR420; - } else { - /* - * Currently there is no interface defined to - * check user preference between RGB/YCBCR444 - * or YCBCR420. So the only possible case for - * YCBCR444 usage is driving YCBCR420 output - * with LSPCON, when pipe is configured for - * YCBCR444 output and LSPCON takes care of - * downsampling it. - */ - pipe_config->lspcon_downsampling = true; - output = INTEL_OUTPUT_FORMAT_YCBCR444; - } - } + return INTEL_OUTPUT_FORMAT_YCBCR420; + } else if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { + return INTEL_OUTPUT_FORMAT_YCBCR444; + } else { + return INTEL_OUTPUT_FORMAT_RGB; } - - pipe_config->output_format = output; } static void i9xx_get_pipe_color_config(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_plane *plane = to_intel_plane(crtc->base.primary); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; @@ -8631,6 +9241,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; @@ -8721,7 +9332,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, * but in case the pipe is enabled w/o any ports we need a sane * default. */ - pipe_config->base.adjusted_mode.crtc_clock = + pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock / pipe_config->pixel_multiplier; ret = true; @@ -8732,7 +9343,7 @@ out: return ret; } -static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) +static void ilk_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; int i; @@ -9169,7 +9780,6 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv, static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; - bool pch_ssc_in_use = false; bool has_fdi = false; for_each_intel_encoder(&dev_priv->drm, encoder) { @@ -9197,22 +9807,24 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) * clock hierarchy. That would also allow us to do * clock bending finally. */ + dev_priv->pch_ssc_use = 0; + if (spll_uses_pch_ssc(dev_priv)) { DRM_DEBUG_KMS("SPLL using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) { DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) { DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2); } - if (pch_ssc_in_use) + if (dev_priv->pch_ssc_use) return; if (has_fdi) { @@ -9229,14 +9841,14 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) void intel_init_pch_refclk(struct drm_i915_private *dev_priv) { if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) - ironlake_init_pch_refclk(dev_priv); + ilk_init_pch_refclk(dev_priv); else if (HAS_PCH_LPT(dev_priv)) lpt_init_pch_refclk(dev_priv); } -static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) +static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val; @@ -9264,23 +9876,35 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) if (crtc_state->dither) val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); - if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) val |= PIPECONF_INTERLACED_ILK; else val |= PIPECONF_PROGRESSIVE; + /* + * This would end up with an odd purple hue over + * the entire display. Make sure we don't do it. + */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + if (crtc_state->limited_color_range) val |= PIPECONF_COLOR_RANGE_SELECT; + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV709; + val |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode); + val |= PIPECONF_FRAME_START_DELAY(0); + I915_WRITE(PIPECONF(pipe), val); POSTING_READ(PIPECONF(pipe)); } -static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) +static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 val = 0; @@ -9288,18 +9912,22 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) if (IS_HASWELL(dev_priv) && crtc_state->dither) val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); - if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) val |= PIPECONF_INTERLACED_ILK; else val |= PIPECONF_PROGRESSIVE; + if (IS_HASWELL(dev_priv) && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV_HSW; + I915_WRITE(PIPECONF(cpu_transcoder), val); POSTING_READ(PIPECONF(cpu_transcoder)); } static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 val = 0; @@ -9362,7 +9990,7 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) } } -int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) +int ilk_get_lanes_required(int target_clock, int link_bw, int bpp) { /* * Account for spread spectrum to avoid @@ -9373,14 +10001,14 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) return DIV_ROUND_UP(bps, link_bw * 8); } -static bool ironlake_needs_fb_cb_tune(struct dpll *dpll, int factor) +static bool ilk_needs_fb_cb_tune(struct dpll *dpll, int factor) { return i9xx_dpll_compute_m(dpll) < factor * dpll->n; } -static void ironlake_compute_dpll(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct dpll *reduced_clock) +static void ilk_compute_dpll(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct dpll *reduced_clock) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 dpll, fp, fp2; @@ -9400,7 +10028,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, fp = i9xx_dpll_compute_fp(&crtc_state->dpll); - if (ironlake_needs_fb_cb_tune(&crtc_state->dpll, factor)) + if (ilk_needs_fb_cb_tune(&crtc_state->dpll, factor)) fp |= FP_CB_TUNE; if (reduced_clock) { @@ -9443,7 +10071,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, * clear if it''s a win or loss power wise. No point in doing * this on ILK at all since it has a fixed DPLL<->pipe mapping. */ - if (INTEL_INFO(dev_priv)->num_pipes == 3 && + if (INTEL_NUM_PIPES(dev_priv) == 3 && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) dpll |= DPLL_SDVO_HIGH_SPEED; @@ -9480,10 +10108,12 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, crtc_state->dpll_hw_state.fp1 = fp2; } -static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) +static int ilk_crtc_compute_clock(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->uapi.state); const struct intel_limit *limit; int refclk = 120000; @@ -9503,17 +10133,17 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, if (intel_is_dual_link_lvds(dev_priv)) { if (refclk == 100000) - limit = &intel_limits_ironlake_dual_lvds_100m; + limit = &ilk_limits_dual_lvds_100m; else - limit = &intel_limits_ironlake_dual_lvds; + limit = &ilk_limits_dual_lvds; } else { if (refclk == 100000) - limit = &intel_limits_ironlake_single_lvds_100m; + limit = &ilk_limits_single_lvds_100m; else - limit = &intel_limits_ironlake_single_lvds; + limit = &ilk_limits_single_lvds; } } else { - limit = &intel_limits_ironlake_dac; + limit = &ilk_limits_dac; } if (!crtc_state->clock_set && @@ -9523,9 +10153,9 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, return -EINVAL; } - ironlake_compute_dpll(crtc, crtc_state, NULL); + ilk_compute_dpll(crtc, crtc_state, NULL); - if (!intel_get_shared_dpll(crtc_state, NULL)) { + if (!intel_reserve_shared_dplls(state, crtc, NULL)) { DRM_DEBUG_KMS("failed to find PLL for pipe %c\n", pipe_name(crtc->pipe)); return -EINVAL; @@ -9598,15 +10228,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, &pipe_config->dp_m2_n2); } -static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, &pipe_config->fdi_m_n, NULL); } -static void skylake_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void skl_get_pfit_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9637,8 +10267,8 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, } static void -skylake_get_initial_plane_config(struct intel_crtc *crtc, - struct intel_initial_plane_config *plane_config) +skl_get_initial_plane_config(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9696,7 +10326,11 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, case PLANE_CTL_TILED_Y: plane_config->tiling = I915_TILING_Y; if (val & PLANE_CTL_RENDER_DECOMPRESSION_ENABLE) - fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS; + fb->modifier = INTEL_GEN(dev_priv) >= 12 ? + I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS : + I915_FORMAT_MOD_Y_TILED_CCS; + else if (val & PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; else fb->modifier = I915_FORMAT_MOD_Y_TILED; break; @@ -9740,8 +10374,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, offset = I915_READ(PLANE_OFFSET(pipe, plane_id)); val = I915_READ(PLANE_SIZE(pipe, plane_id)); - fb->height = ((val >> 16) & 0xfff) + 1; - fb->width = ((val >> 0) & 0x1fff) + 1; + fb->height = ((val >> 16) & 0xffff) + 1; + fb->width = ((val >> 0) & 0xffff) + 1; val = I915_READ(PLANE_STRIDE(pipe, plane_id)); stride_mult = skl_plane_stride_mult(fb, 0, DRM_MODE_ROTATE_0); @@ -9763,8 +10397,8 @@ error: kfree(intel_fb); } -static void ironlake_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_pfit_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9787,8 +10421,8 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, } } -static bool ironlake_get_pipe_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static bool ilk_get_pipe_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9802,9 +10436,9 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (!wakeref) return false; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; tmp = I915_READ(PIPECONF(crtc->pipe)); @@ -9831,6 +10465,16 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (tmp & PIPECONF_COLOR_RANGE_SELECT) pipe_config->limited_color_range = true; + switch (tmp & PIPECONF_OUTPUT_COLORSPACE_MASK) { + case PIPECONF_OUTPUT_COLORSPACE_YUV601: + case PIPECONF_OUTPUT_COLORSPACE_YUV709: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + break; + default: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + break; + } + pipe_config->gamma_mode = (tmp & PIPECONF_GAMMA_MODE_MASK_ILK) >> PIPECONF_GAMMA_MODE_SHIFT; @@ -9849,7 +10493,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >> FDI_DP_PORT_WIDTH_SHIFT) + 1; - ironlake_get_fdi_m_n_config(crtc, pipe_config); + ilk_get_fdi_m_n_config(crtc, pipe_config); if (HAS_PCH_IBX(dev_priv)) { /* @@ -9877,7 +10521,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, ((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK) >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1; - ironlake_pch_clock_get(crtc, pipe_config); + ilk_pch_clock_get(crtc, pipe_config); } else { pipe_config->pixel_multiplier = 1; } @@ -9885,7 +10529,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_timings(crtc, pipe_config); intel_get_pipe_src_size(crtc, pipe_config); - ironlake_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(crtc, pipe_config); ret = true; @@ -9894,19 +10538,20 @@ out: return ret; } -static int haswell_crtc_compute_clock(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) + +static int hsw_crtc_compute_clock(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->base.state); + to_intel_atomic_state(crtc_state->uapi.state); if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) || INTEL_GEN(dev_priv) >= 11) { struct intel_encoder *encoder = intel_get_crtc_new_encoder(state, crtc_state); - if (!intel_get_shared_dpll(crtc_state, encoder)) { + if (!intel_reserve_shared_dplls(state, crtc, encoder)) { DRM_DEBUG_KMS("failed to find PLL for pipe %c\n", pipe_name(crtc->pipe)); return -EINVAL; @@ -9916,9 +10561,8 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, return 0; } -static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void cnl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 temp; @@ -9932,26 +10576,40 @@ static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { + enum phy phy = intel_port_to_phy(dev_priv, port); + enum icl_port_dpll_id port_dpll_id; enum intel_dpll_id id; u32 temp; - /* TODO: TBT pll not implemented. */ - if (intel_port_is_combophy(dev_priv, port)) { - temp = I915_READ(DPCLKA_CFGCR0_ICL) & - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - id = temp >> DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port); - } else if (intel_port_is_tc(dev_priv, port)) { - id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port)); + if (intel_phy_is_combo(dev_priv, phy)) { + temp = I915_READ(ICL_DPCLKA_CFGCR0) & + ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + id = temp >> ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + port_dpll_id = ICL_PORT_DPLL_DEFAULT; + } else if (intel_phy_is_tc(dev_priv, phy)) { + u32 clk_sel = I915_READ(DDI_CLK_SEL(port)) & DDI_CLK_SEL_MASK; + + if (clk_sel == DDI_CLK_SEL_MG) { + id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, + port)); + port_dpll_id = ICL_PORT_DPLL_MG_PHY; + } else { + WARN_ON(clk_sel < DDI_CLK_SEL_TBT_162); + id = DPLL_ID_ICL_TBTPLL; + port_dpll_id = ICL_PORT_DPLL_DEFAULT; + } } else { WARN(1, "Invalid port %x\n", port); return; } - pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); + pipe_config->icl_port_dplls[port_dpll_id].pll = + intel_get_shared_dpll_by_id(dev_priv, id); + + icl_set_active_port_dpll(pipe_config, port_dpll_id); } static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, @@ -9978,9 +10636,8 @@ static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void skl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 temp; @@ -9994,9 +10651,8 @@ static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); @@ -10097,6 +10753,9 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, case TRANS_DDI_EDP_INPUT_C_ONOFF: trans_pipe = PIPE_C; break; + case TRANS_DDI_EDP_INPUT_D_ONOFF: + trans_pipe = PIPE_D; + break; } if (trans_pipe == crtc->pipe) { @@ -10181,28 +10840,36 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, return transcoder_is_dsi(pipe_config->cpu_transcoder); } -static void haswell_get_ddi_port_state(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void hsw_get_ddi_port_state(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; struct intel_shared_dpll *pll; enum port port; u32 tmp; - tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); - - port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; + if (transcoder_is_dsi(cpu_transcoder)) { + port = (cpu_transcoder == TRANSCODER_DSI_A) ? + PORT_A : PORT_B; + } else { + tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + if (INTEL_GEN(dev_priv) >= 12) + port = TGL_TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp); + else + port = TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp); + } if (INTEL_GEN(dev_priv) >= 11) - icelake_get_ddi_pll(dev_priv, port, pipe_config); + icl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_CANNONLAKE(dev_priv)) - cannonlake_get_ddi_pll(dev_priv, port, pipe_config); + cnl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_BC(dev_priv)) - skylake_get_ddi_pll(dev_priv, port, pipe_config); + skl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); else - haswell_get_ddi_pll(dev_priv, port, pipe_config); + hsw_get_ddi_pll(dev_priv, port, pipe_config); pll = pipe_config->shared_dpll; if (pll) { @@ -10223,12 +10890,65 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >> FDI_DP_PORT_WIDTH_SHIFT) + 1; - ironlake_get_fdi_m_n_config(crtc, pipe_config); + ilk_get_fdi_m_n_config(crtc, pipe_config); } } -static bool haswell_get_pipe_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) +{ + u32 trans_port_sync, master_select; + + trans_port_sync = I915_READ(TRANS_DDI_FUNC_CTL2(cpu_transcoder)); + + if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0) + return INVALID_TRANSCODER; + + master_select = trans_port_sync & + PORT_SYNC_MODE_MASTER_SELECT_MASK; + if (master_select == 0) + return TRANSCODER_EDP; + else + return master_select - 1; +} + +static void icl_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + u32 transcoders; + enum transcoder cpu_transcoder; + + crtc_state->master_transcoder = transcoder_master_readout(dev_priv, + crtc_state->cpu_transcoder); + + transcoders = BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C) | + BIT(TRANSCODER_D); + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + enum intel_display_power_domain power_domain; + intel_wakeref_t trans_wakeref; + + power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); + trans_wakeref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + + if (!trans_wakeref) + continue; + + if (transcoder_master_readout(dev_priv, cpu_transcoder) == + crtc_state->cpu_transcoder) + crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder); + + intel_display_power_put(dev_priv, power_domain, trans_wakeref); + } + + WARN_ON(crtc_state->master_transcoder != INVALID_TRANSCODER && + crtc_state->sync_mode_slaves_mask); +} + +static bool hsw_get_pipe_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); intel_wakeref_t wakerefs[POWER_DOMAIN_NUM], wf; @@ -10236,7 +10956,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, u64 power_domain_mask; bool active; - intel_crtc_init_scalers(crtc, pipe_config); + pipe_config->master_transcoder = INVALID_TRANSCODER; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); wf = intel_display_power_get_if_enabled(dev_priv, power_domain); @@ -10263,12 +10983,35 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || INTEL_GEN(dev_priv) >= 11) { - haswell_get_ddi_port_state(crtc, pipe_config); + hsw_get_ddi_port_state(crtc, pipe_config); intel_get_pipe_timings(crtc, pipe_config); } intel_get_pipe_src_size(crtc, pipe_config); - intel_get_crtc_ycbcr_config(crtc, pipe_config); + + if (IS_HASWELL(dev_priv)) { + u32 tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); + + if (tmp & PIPECONF_OUTPUT_COLORSPACE_YUV_HSW) + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + else + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + } else { + pipe_config->output_format = + bdw_get_pipemisc_output_format(crtc); + + /* + * Currently there is no interface defined to + * check user preference between RGB/YCBCR444 + * or YCBCR420. So the only possible case for + * YCBCR444 usage is driving YCBCR420 output + * with LSPCON, when pipe is configured for + * YCBCR444 output and LSPCON takes care of + * downsampling it. + */ + pipe_config->lspcon_downsampling = + pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444; + } pipe_config->gamma_mode = I915_READ(GAMMA_MODE(crtc->pipe)); @@ -10297,9 +11040,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) - skylake_get_pfit_config(crtc, pipe_config); + skl_get_pfit_config(crtc, pipe_config); else - ironlake_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(crtc, pipe_config); } if (hsw_crtc_supports_ips(crtc)) { @@ -10323,6 +11066,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, pipe_config->pixel_multiplier = 1; } + if (INTEL_GEN(dev_priv) >= 11 && + !transcoder_is_dsi(pipe_config->cpu_transcoder)) + icl_get_trans_port_sync_config(pipe_config); + out: for_each_power_domain(power_domain, power_domain_mask) intel_display_power_put(dev_priv, @@ -10334,8 +11081,8 @@ out: static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; const struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; @@ -10344,21 +11091,13 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) else base = intel_plane_ggtt_offset(plane_state); - base += plane_state->color_plane[0].offset; - - /* ILK+ do this automagically */ - if (HAS_GMCH(dev_priv) && - plane_state->base.rotation & DRM_MODE_ROTATE_180) - base += (plane_state->base.crtc_h * - plane_state->base.crtc_w - 1) * fb->format->cpp[0]; - - return base; + return base + plane_state->color_plane[0].offset; } static u32 intel_cursor_position(const struct intel_plane_state *plane_state) { - int x = plane_state->base.crtc_x; - int y = plane_state->base.crtc_y; + int x = plane_state->uapi.dst.x1; + int y = plane_state->uapi.dst.y1; u32 pos = 0; if (x < 0) { @@ -10379,9 +11118,9 @@ static u32 intel_cursor_position(const struct intel_plane_state *plane_state) static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) { const struct drm_mode_config *config = - &plane_state->base.plane->dev->mode_config; - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + &plane_state->uapi.plane->dev->mode_config; + int width = drm_rect_width(&plane_state->uapi.dst); + int height = drm_rect_height(&plane_state->uapi.dst); return width > 0 && width <= config->cursor_width && height > 0 && height <= config->cursor_height; @@ -10389,6 +11128,9 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) static int intel_cursor_check_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->uapi.plane->dev); + unsigned int rotation = plane_state->hw.rotation; int src_x, src_y; u32 offset; int ret; @@ -10397,11 +11139,11 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; - src_x = plane_state->base.src_x >> 16; - src_y = plane_state->base.src_y >> 16; + src_x = plane_state->uapi.src.x1 >> 16; + src_y = plane_state->uapi.src.y1 >> 16; intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&src_x, &src_y, @@ -10412,7 +11154,25 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) return -EINVAL; } + /* + * Put the final coordinates back so that the src + * coordinate checks will see the right values. + */ + drm_rect_translate_to(&plane_state->uapi.src, + src_x << 16, src_y << 16); + + /* ILK+ do this automagically in hardware */ + if (HAS_GMCH(dev_priv) && rotation & DRM_MODE_ROTATE_180) { + const struct drm_framebuffer *fb = plane_state->hw.fb; + int src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + int src_h = drm_rect_height(&plane_state->uapi.src) >> 16; + + offset += (src_h * src_w - 1) * fb->format->cpp[0]; + } + plane_state->color_plane[0].offset = offset; + plane_state->color_plane[0].x = src_x; + plane_state->color_plane[0].y = src_y; return 0; } @@ -10420,7 +11180,7 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) static int intel_check_cursor(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int ret; if (fb && fb->modifier != DRM_FORMAT_MOD_LINEAR) { @@ -10428,19 +11188,23 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - ret = drm_atomic_helper_check_plane_state(&plane_state->base, - &crtc_state->base, + ret = drm_atomic_helper_check_plane_state(&plane_state->uapi, + &crtc_state->uapi, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, true, true); if (ret) return ret; + /* Use the unclipped src/dst rectangles, which we program to hw */ + plane_state->uapi.src = drm_plane_state_src(&plane_state->uapi); + plane_state->uapi.dst = drm_plane_state_dest(&plane_state->uapi); + ret = intel_cursor_check_surface(plane_state); if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; ret = intel_plane_check_src_coordinates(plane_state); @@ -10478,7 +11242,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) { - int width = plane_state->base.crtc_w; + int width = drm_rect_width(&plane_state->uapi.dst); /* * 845g/865g are only limited by the width of their cursors, @@ -10490,7 +11254,7 @@ static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) static int i845_check_cursor(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int ret; ret = intel_check_cursor(crtc_state, plane_state); @@ -10504,12 +11268,12 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i845_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->uapi.dst), + drm_rect_height(&plane_state->uapi.dst)); return -EINVAL; } - WARN_ON(plane_state->base.visible && + WARN_ON(plane_state->uapi.visible && plane_state->color_plane[0].stride != fb->pitches[0]); switch (fb->pitches[0]) { @@ -10537,9 +11301,9 @@ static void i845_update_cursor(struct intel_plane *plane, u32 cntl = 0, base = 0, pos = 0, size = 0; unsigned long irqflags; - if (plane_state && plane_state->base.visible) { - unsigned int width = plane_state->base.crtc_w; - unsigned int height = plane_state->base.crtc_h; + if (plane_state && plane_state->uapi.visible) { + unsigned int width = drm_rect_width(&plane_state->uapi.dst); + unsigned int height = drm_rect_height(&plane_state->uapi.dst); cntl = plane_state->ctl | i845_cursor_ctl_crtc(crtc_state); @@ -10612,7 +11376,7 @@ i9xx_cursor_max_stride(struct intel_plane *plane, static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 cntl = 0; @@ -10635,13 +11399,13 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); + to_i915(plane_state->uapi.plane->dev); u32 cntl = 0; if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) cntl |= MCURSOR_TRICKLE_FEED_DISABLE; - switch (plane_state->base.crtc_w) { + switch (drm_rect_width(&plane_state->uapi.dst)) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; break; @@ -10652,11 +11416,11 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, cntl |= MCURSOR_MODE_256_ARGB_AX; break; default: - MISSING_CASE(plane_state->base.crtc_w); + MISSING_CASE(drm_rect_width(&plane_state->uapi.dst)); return 0; } - if (plane_state->base.rotation & DRM_MODE_ROTATE_180) + if (plane_state->hw.rotation & DRM_MODE_ROTATE_180) cntl |= MCURSOR_ROTATE_180; return cntl; @@ -10665,9 +11429,9 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + to_i915(plane_state->uapi.plane->dev); + int width = drm_rect_width(&plane_state->uapi.dst); + int height = drm_rect_height(&plane_state->uapi.dst); if (!intel_cursor_size_ok(plane_state)) return false; @@ -10689,7 +11453,7 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) * cursors. */ if (HAS_CUR_FBC(dev_priv) && - plane_state->base.rotation & DRM_MODE_ROTATE_0) { + plane_state->hw.rotation & DRM_MODE_ROTATE_0) { if (height < 8 || height > width) return false; } else { @@ -10703,9 +11467,9 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; int ret; @@ -10720,17 +11484,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i9xx_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->uapi.dst), + drm_rect_height(&plane_state->uapi.dst)); return -EINVAL; } - WARN_ON(plane_state->base.visible && + WARN_ON(plane_state->uapi.visible && plane_state->color_plane[0].stride != fb->pitches[0]); - if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) { + if (fb->pitches[0] != + drm_rect_width(&plane_state->uapi.dst) * fb->format->cpp[0]) { DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n", - fb->pitches[0], plane_state->base.crtc_w); + fb->pitches[0], + drm_rect_width(&plane_state->uapi.dst)); return -EINVAL; } @@ -10745,7 +11511,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && - plane_state->base.visible && plane_state->base.crtc_x < 0) { + plane_state->uapi.visible && plane_state->uapi.dst.x1 < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -10764,12 +11530,15 @@ static void i9xx_update_cursor(struct intel_plane *plane, u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0; unsigned long irqflags; - if (plane_state && plane_state->base.visible) { + if (plane_state && plane_state->uapi.visible) { + unsigned width = drm_rect_width(&plane_state->uapi.dst); + unsigned height = drm_rect_height(&plane_state->uapi.dst); + cntl = plane_state->ctl | i9xx_cursor_ctl_crtc(crtc_state); - if (plane_state->base.crtc_h != plane_state->base.crtc_w) - fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); + if (width != height) + fbc_ctl = CUR_FBC_CTL_EN | (height - 1); base = intel_cursor_base(plane_state); pos = intel_cursor_position(plane_state); @@ -10914,13 +11683,12 @@ static int intel_modeset_disable_planes(struct drm_atomic_state *state, } int intel_get_load_detect_pipe(struct drm_connector *connector, - const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { struct intel_crtc *intel_crtc; struct intel_encoder *intel_encoder = - intel_attached_encoder(connector); + intel_attached_encoder(to_intel_connector(connector)); struct drm_crtc *possible_crtc; struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = NULL; @@ -11019,12 +11787,10 @@ found: goto fail; } - crtc_state->base.active = crtc_state->base.enable = true; - - if (!mode) - mode = &load_detect_mode; + crtc_state->uapi.active = true; - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); + ret = drm_atomic_set_mode_for_crtc(&crtc_state->uapi, + &load_detect_mode); if (ret) goto fail; @@ -11076,7 +11842,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *intel_encoder = - intel_attached_encoder(connector); + intel_attached_encoder(to_intel_connector(connector)); struct drm_encoder *encoder = &intel_encoder->base; struct drm_atomic_state *state = old->restore_state; int ret; @@ -11116,7 +11882,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; u32 dpll = pipe_config->dpll_hw_state.dpll; u32 fp; struct dpll clock; @@ -11219,8 +11985,8 @@ int intel_dotclock_calculate(int link_freq, return div_u64(mul_u32_u32(m_n->link_m, link_freq), m_n->link_n); } -static void ironlake_pch_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_pch_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -11232,11 +11998,38 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc, * we may need some idea for the dotclock anyway. * Calculate one based on the FDI configuration. */ - pipe_config->base.adjusted_mode.crtc_clock = + pipe_config->hw.adjusted_mode.crtc_clock = intel_dotclock_calculate(intel_fdi_link_freq(dev_priv, pipe_config), &pipe_config->fdi_m_n); } +static void intel_crtc_state_reset(struct intel_crtc_state *crtc_state, + struct intel_crtc *crtc) +{ + memset(crtc_state, 0, sizeof(*crtc_state)); + + __drm_atomic_helper_crtc_state_reset(&crtc_state->uapi, &crtc->base); + + crtc_state->cpu_transcoder = INVALID_TRANSCODER; + crtc_state->master_transcoder = INVALID_TRANSCODER; + crtc_state->hsw_workaround_pipe = INVALID_PIPE; + crtc_state->output_format = INTEL_OUTPUT_FORMAT_INVALID; + crtc_state->scaler_state.scaler_id = -1; + crtc_state->mst_master_transcoder = INVALID_TRANSCODER; +} + +static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc) +{ + struct intel_crtc_state *crtc_state; + + crtc_state = kmalloc(sizeof(*crtc_state), GFP_KERNEL); + + if (crtc_state) + intel_crtc_state_reset(crtc_state, crtc); + + return crtc_state; +} + /* Returns the currently programmed mode of the given encoder. */ struct drm_display_mode * intel_encoder_current_mode(struct intel_encoder *encoder) @@ -11256,14 +12049,12 @@ intel_encoder_current_mode(struct intel_encoder *encoder) if (!mode) return NULL; - crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); + crtc_state = intel_crtc_state_alloc(crtc); if (!crtc_state) { kfree(mode); return NULL; } - crtc_state->base.crtc = &crtc->base; - if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { kfree(crtc_state); kfree(mode); @@ -11297,22 +12088,22 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) * * Returns true or false. */ -static bool intel_wm_need_update(struct intel_plane_state *cur, +static bool intel_wm_need_update(const struct intel_plane_state *cur, struct intel_plane_state *new) { /* Update watermarks on tiling or size changes. */ - if (new->base.visible != cur->base.visible) + if (new->uapi.visible != cur->uapi.visible) return true; - if (!cur->base.fb || !new->base.fb) + if (!cur->hw.fb || !new->hw.fb) return false; - if (cur->base.fb->modifier != new->base.fb->modifier || - cur->base.rotation != new->base.rotation || - drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) || - drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) || - drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) || - drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst)) + if (cur->hw.fb->modifier != new->hw.fb->modifier || + cur->hw.rotation != new->hw.rotation || + drm_rect_width(&new->uapi.src) != drm_rect_width(&cur->uapi.src) || + drm_rect_height(&new->uapi.src) != drm_rect_height(&cur->uapi.src) || + drm_rect_width(&new->uapi.dst) != drm_rect_width(&cur->uapi.dst) || + drm_rect_height(&new->uapi.dst) != drm_rect_height(&cur->uapi.dst)) return true; return false; @@ -11320,42 +12111,36 @@ static bool intel_wm_need_update(struct intel_plane_state *cur, static bool needs_scaling(const struct intel_plane_state *state) { - int src_w = drm_rect_width(&state->base.src) >> 16; - int src_h = drm_rect_height(&state->base.src) >> 16; - int dst_w = drm_rect_width(&state->base.dst); - int dst_h = drm_rect_height(&state->base.dst); + int src_w = drm_rect_width(&state->uapi.src) >> 16; + int src_h = drm_rect_height(&state->uapi.src) >> 16; + int dst_w = drm_rect_width(&state->uapi.dst); + int dst_h = drm_rect_height(&state->uapi.dst); return (src_w != dst_w || src_h != dst_h); } int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, - struct drm_crtc_state *crtc_state, + struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, - struct drm_plane_state *plane_state) + struct intel_plane_state *plane_state) { - struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); - struct drm_crtc *crtc = crtc_state->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *plane = to_intel_plane(plane_state->plane); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); bool mode_changed = needs_modeset(crtc_state); - bool was_crtc_enabled = old_crtc_state->base.active; - bool is_crtc_enabled = crtc_state->active; + bool was_crtc_enabled = old_crtc_state->hw.active; + bool is_crtc_enabled = crtc_state->hw.active; bool turn_off, turn_on, visible, was_visible; - struct drm_framebuffer *fb = plane_state->fb; int ret; if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { - ret = skl_update_scaler_plane( - to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state)); + ret = skl_update_scaler_plane(crtc_state, plane_state); if (ret) return ret; } - was_visible = old_plane_state->base.visible; - visible = plane_state->visible; + was_visible = old_plane_state->uapi.visible; + visible = plane_state->uapi.visible; if (!was_crtc_enabled && WARN_ON(was_visible)) was_visible = false; @@ -11371,55 +12156,48 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat * only combine the results from all planes in the current place? */ if (!is_crtc_enabled) { - plane_state->visible = visible = false; - to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id); - to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0; + plane_state->uapi.visible = visible = false; + crtc_state->active_planes &= ~BIT(plane->id); + crtc_state->data_rate[plane->id] = 0; + crtc_state->min_cdclk[plane->id] = 0; } if (!was_visible && !visible) return 0; - if (fb != old_plane_state->base.fb) - pipe_config->fb_changed = true; - turn_off = was_visible && (!visible || mode_changed); turn_on = visible && (!was_visible || mode_changed); - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", - intel_crtc->base.base.id, intel_crtc->base.name, - plane->base.base.id, plane->base.name, - fb ? fb->base.id : -1); - - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", + crtc->base.base.id, crtc->base.name, plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); if (turn_on) { if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) - pipe_config->update_wm_pre = true; + crtc_state->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) - pipe_config->disable_cxsr = true; + crtc_state->disable_cxsr = true; } else if (turn_off) { if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) - pipe_config->update_wm_post = true; + crtc_state->update_wm_post = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) - pipe_config->disable_cxsr = true; - } else if (intel_wm_need_update(to_intel_plane_state(plane->base.state), - to_intel_plane_state(plane_state))) { + crtc_state->disable_cxsr = true; + } else if (intel_wm_need_update(old_plane_state, plane_state)) { if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) { /* FIXME bollocks */ - pipe_config->update_wm_pre = true; - pipe_config->update_wm_post = true; + crtc_state->update_wm_pre = true; + crtc_state->update_wm_post = true; } } if (visible || was_visible) - pipe_config->fb_bits |= plane->frontbuffer_bit; + crtc_state->fb_bits |= plane->frontbuffer_bit; /* * ILK/SNB DVSACNTR/Sprite Enable @@ -11458,8 +12236,8 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat (IS_GEN_RANGE(dev_priv, 5, 6) || IS_IVYBRIDGE(dev_priv)) && (turn_on || (!needs_scaling(old_plane_state) && - needs_scaling(to_intel_plane_state(plane_state))))) - pipe_config->disable_lp_wm = true; + needs_scaling(plane_state)))) + crtc_state->disable_lp_wm = true; return 0; } @@ -11501,7 +12279,7 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) int i; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - linked = plane_state->linked_plane; + linked = plane_state->planar_linked_plane; if (!linked) continue; @@ -11510,8 +12288,8 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) if (IS_ERR(linked_plane_state)) return PTR_ERR(linked_plane_state); - WARN_ON(linked_plane_state->linked_plane != plane); - WARN_ON(linked_plane_state->slave == plane_state->slave); + WARN_ON(linked_plane_state->planar_linked_plane != plane); + WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave); } return 0; @@ -11519,9 +12297,9 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state); + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_plane *plane, *linked; struct intel_plane_state *plane_state; int i; @@ -11534,16 +12312,16 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) * in the crtc_state->active_planes mask. */ for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - if (plane->pipe != crtc->pipe || !plane_state->linked_plane) + if (plane->pipe != crtc->pipe || !plane_state->planar_linked_plane) continue; - plane_state->linked_plane = NULL; - if (plane_state->slave && !plane_state->base.visible) { + plane_state->planar_linked_plane = NULL; + if (plane_state->planar_slave && !plane_state->uapi.visible) { crtc_state->active_planes &= ~BIT(plane->id); crtc_state->update_planes |= BIT(plane->id); } - plane_state->slave = false; + plane_state->planar_slave = false; } if (!crtc_state->nv12_planes) @@ -11577,13 +12355,33 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) return -EINVAL; } - plane_state->linked_plane = linked; + plane_state->planar_linked_plane = linked; - linked_state->slave = true; - linked_state->linked_plane = plane; + linked_state->planar_slave = true; + linked_state->planar_linked_plane = plane; crtc_state->active_planes |= BIT(linked->id); crtc_state->update_planes |= BIT(linked->id); DRM_DEBUG_KMS("Using %s as Y plane for %s\n", linked->base.name, plane->base.name); + + /* Copy parameters to slave plane */ + linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE; + linked_state->color_ctl = plane_state->color_ctl; + linked_state->view = plane_state->view; + memcpy(linked_state->color_plane, plane_state->color_plane, + sizeof(linked_state->color_plane)); + + intel_plane_copy_uapi_to_hw_state(linked_state, plane_state); + linked_state->uapi.src = plane_state->uapi.src; + linked_state->uapi.dst = plane_state->uapi.dst; + + if (icl_is_hdr_plane(dev_priv, plane->id)) { + if (linked->id == PLANE_SPRITE5) + plane_state->cus_ctl |= PLANE_CUS_PLANE_7; + else if (linked->id == PLANE_SPRITE4) + plane_state->cus_ctl |= PLANE_CUS_PLANE_6; + else + MISSING_CASE(linked->id); + } } return 0; @@ -11591,34 +12389,150 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct intel_atomic_state *state = - to_intel_atomic_state(new_crtc_state->base.state); + to_intel_atomic_state(new_crtc_state->uapi.state); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } -static int intel_crtc_atomic_check(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state) +static bool +intel_atomic_is_master_connector(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc_state); - int ret; + struct drm_crtc *crtc = crtc_state->uapi.crtc; + struct drm_atomic_state *state = crtc_state->uapi.state; + struct drm_connector *connector; + struct drm_connector_state *connector_state; + int i; + + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector_state->crtc != crtc) + continue; + if (connector->has_tile && + connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + return true; + } + + return false; +} + +static void reset_port_sync_mode_state(struct intel_crtc_state *crtc_state) +{ + crtc_state->master_transcoder = INVALID_TRANSCODER; + crtc_state->sync_mode_slaves_mask = 0; +} + +static int icl_compute_port_sync_crtc_state(struct drm_connector *connector, + struct intel_crtc_state *crtc_state, + int num_tiled_conns) +{ + struct drm_crtc *crtc = crtc_state->uapi.crtc; + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct drm_connector *master_connector; + struct drm_connector_list_iter conn_iter; + struct drm_crtc *master_crtc = NULL; + struct drm_crtc_state *master_crtc_state; + struct intel_crtc_state *master_pipe_config; + + if (INTEL_GEN(dev_priv) < 11) + return 0; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP)) + return 0; + + /* + * In case of tiled displays there could be one or more slaves but there is + * only one master. Lets make the CRTC used by the connector corresponding + * to the last horizonal and last vertical tile a master/genlock CRTC. + * All the other CRTCs corresponding to other tiles of the same Tile group + * are the slave CRTCs and hold a pointer to their genlock CRTC. + * If all tiles not present do not make master slave assignments. + */ + if (!connector->has_tile || + crtc_state->hw.mode.hdisplay != connector->tile_h_size || + crtc_state->hw.mode.vdisplay != connector->tile_v_size || + num_tiled_conns < connector->num_h_tile * connector->num_v_tile) { + reset_port_sync_mode_state(crtc_state); + return 0; + } + /* Last Horizontal and last vertical tile connector is a master + * Master's crtc state is already populated in slave for port sync + */ + if (connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + return 0; + + /* Loop through all connectors and configure the Slave crtc_state + * to point to the correct master. + */ + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(master_connector, &conn_iter) { + struct drm_connector_state *master_conn_state = NULL; + + if (!(master_connector->has_tile && + master_connector->tile_group->id == connector->tile_group->id)) + continue; + if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 || + master_connector->tile_v_loc != master_connector->num_v_tile - 1) + continue; + + master_conn_state = drm_atomic_get_connector_state(&state->base, + master_connector); + if (IS_ERR(master_conn_state)) { + drm_connector_list_iter_end(&conn_iter); + return PTR_ERR(master_conn_state); + } + if (master_conn_state->crtc) { + master_crtc = master_conn_state->crtc; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (!master_crtc) { + DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n", + crtc->base.id); + return -EINVAL; + } + + master_crtc_state = drm_atomic_get_crtc_state(&state->base, + master_crtc); + if (IS_ERR(master_crtc_state)) + return PTR_ERR(master_crtc_state); + + master_pipe_config = to_intel_crtc_state(master_crtc_state); + crtc_state->master_transcoder = master_pipe_config->cpu_transcoder; + master_pipe_config->sync_mode_slaves_mask |= + BIT(crtc_state->cpu_transcoder); + DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n", + transcoder_name(crtc_state->master_transcoder), + crtc->base.id, + master_pipe_config->sync_mode_slaves_mask); + + return 0; +} + +static int intel_crtc_atomic_check(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); bool mode_changed = needs_modeset(crtc_state); + int ret; if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && - mode_changed && !crtc_state->active) - pipe_config->update_wm_post = true; + mode_changed && !crtc_state->hw.active) + crtc_state->update_wm_post = true; - if (mode_changed && crtc_state->enable && + if (mode_changed && crtc_state->hw.enable && dev_priv->display.crtc_compute_clock && - !WARN_ON(pipe_config->shared_dpll)) { - ret = dev_priv->display.crtc_compute_clock(intel_crtc, - pipe_config); + !WARN_ON(crtc_state->shared_dpll)) { + ret = dev_priv->display.crtc_compute_clock(crtc, crtc_state); if (ret) return ret; } @@ -11627,19 +12541,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * May need to update pipe gamma enable bits * when C8 planes are getting enabled/disabled. */ - if (c8_planes_changed(pipe_config)) - crtc_state->color_mgmt_changed = true; + if (c8_planes_changed(crtc_state)) + crtc_state->uapi.color_mgmt_changed = true; - if (mode_changed || pipe_config->update_pipe || - crtc_state->color_mgmt_changed) { - ret = intel_color_check(pipe_config); + if (mode_changed || crtc_state->update_pipe || + crtc_state->uapi.color_mgmt_changed) { + ret = intel_color_check(crtc_state); if (ret) return ret; } ret = 0; if (dev_priv->display.compute_pipe_wm) { - ret = dev_priv->display.compute_pipe_wm(pipe_config); + ret = dev_priv->display.compute_pipe_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("Target pipe watermarks are invalid\n"); return ret; @@ -11655,7 +12569,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * old state and the new state. We can program these * immediately. */ - ret = dev_priv->display.compute_intermediate_wm(pipe_config); + ret = dev_priv->display.compute_intermediate_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("No valid intermediate pipe watermarks are possible\n"); return ret; @@ -11663,29 +12577,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, } if (INTEL_GEN(dev_priv) >= 9) { - if (mode_changed || pipe_config->update_pipe) - ret = skl_update_scaler_crtc(pipe_config); - - if (!ret) - ret = icl_check_nv12_planes(pipe_config); - if (!ret) - ret = skl_check_pipe_max_pixel_rate(intel_crtc, - pipe_config); + if (mode_changed || crtc_state->update_pipe) + ret = skl_update_scaler_crtc(crtc_state); if (!ret) - ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, - pipe_config); + ret = intel_atomic_setup_scalers(dev_priv, crtc, + crtc_state); } if (HAS_IPS(dev_priv)) - pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config); + crtc_state->ips_enabled = hsw_compute_ips_config(crtc_state); return ret; } -static const struct drm_crtc_helper_funcs intel_helper_funcs = { - .atomic_check = intel_crtc_atomic_check, -}; - static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) { struct intel_connector *connector; @@ -11754,7 +12658,7 @@ compute_baseline_pipe_bpp(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct drm_atomic_state *state = pipe_config->base.state; + struct drm_atomic_state *state = pipe_config->uapi.state; struct drm_connector *connector; struct drm_connector_state *connector_state; int bpp, i; @@ -11811,7 +12715,7 @@ static void intel_dump_infoframe(struct drm_i915_private *dev_priv, const union hdmi_infoframe *frame) { - if ((drm_debug & DRM_UT_KMS) == 0) + if (!drm_debug_enabled(DRM_UT_KMS)) return; hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, frame); @@ -11879,14 +12783,14 @@ static const char *output_formats(enum intel_output_format format) static void intel_dump_plane_state(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - const struct drm_framebuffer *fb = plane_state->base.fb; + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; struct drm_format_name_buf format_name; if (!fb) { DRM_DEBUG_KMS("[PLANE:%d:%s] fb: [NOFB], visible: %s\n", plane->base.base.id, plane->base.name, - yesno(plane_state->base.visible)); + yesno(plane_state->uapi.visible)); return; } @@ -11894,20 +12798,20 @@ static void intel_dump_plane_state(const struct intel_plane_state *plane_state) plane->base.base.id, plane->base.name, fb->base.id, fb->width, fb->height, drm_get_format_name(fb->format->format, &format_name), - yesno(plane_state->base.visible)); + yesno(plane_state->uapi.visible)); DRM_DEBUG_KMS("\trotation: 0x%x, scaler: %d\n", - plane_state->base.rotation, plane_state->scaler_id); - if (plane_state->base.visible) + plane_state->hw.rotation, plane_state->scaler_id); + if (plane_state->uapi.visible) DRM_DEBUG_KMS("\tsrc: " DRM_RECT_FP_FMT " dst: " DRM_RECT_FMT "\n", - DRM_RECT_FP_ARG(&plane_state->base.src), - DRM_RECT_ARG(&plane_state->base.dst)); + DRM_RECT_FP_ARG(&plane_state->uapi.src), + DRM_RECT_ARG(&plane_state->uapi.dst)); } static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, struct intel_atomic_state *state, const char *context) { - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct intel_plane_state *plane_state; struct intel_plane *plane; @@ -11916,14 +12820,14 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, DRM_DEBUG_KMS("[CRTC:%d:%s] enable: %s %s\n", crtc->base.base.id, crtc->base.name, - yesno(pipe_config->base.enable), context); + yesno(pipe_config->hw.enable), context); - if (!pipe_config->base.enable) + if (!pipe_config->hw.enable) goto dump_planes; snprintf_output_types(buf, sizeof(buf), pipe_config->output_types); DRM_DEBUG_KMS("active: %s, output_types: %s (0x%x), output format: %s\n", - yesno(pipe_config->base.active), + yesno(pipe_config->hw.active), buf, pipe_config->output_types, output_formats(pipe_config->output_format)); @@ -11963,10 +12867,10 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, intel_dump_infoframe(dev_priv, &pipe_config->infoframes.hdmi); DRM_DEBUG_KMS("requested mode:\n"); - drm_mode_debug_printmodeline(&pipe_config->base.mode); + drm_mode_debug_printmodeline(&pipe_config->hw.mode); DRM_DEBUG_KMS("adjusted mode:\n"); - drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); - intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); + drm_mode_debug_printmodeline(&pipe_config->hw.adjusted_mode); + intel_dump_crtc_timings(&pipe_config->hw.adjusted_mode); DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, pipe_config->pipe_src_w, pipe_config->pipe_src_h, @@ -11995,6 +12899,18 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, intel_dpll_dump_hw_state(dev_priv, &pipe_config->dpll_hw_state); + if (IS_CHERRYVIEW(dev_priv)) + DRM_DEBUG_KMS("cgm_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->cgm_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + else + DRM_DEBUG_KMS("csc_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->csc_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + + DRM_DEBUG_KMS("MST master transcoder: %s\n", + transcoder_name(pipe_config->mst_master_transcoder)); + dump_planes: if (!state) return; @@ -12015,6 +12931,12 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state) bool ret = true; /* + * We're going to peek into connector->state, + * hence connection_mutex must be held. + */ + drm_modeset_lock_assert_held(&dev->mode_config.connection_mutex); + + /* * Walk the connector list instead of the encoder * list to detect the problem on ddi platforms * where there's just one encoder per digital port. @@ -12071,55 +12993,98 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state) return ret; } +static void +intel_crtc_copy_uapi_to_hw_state_nomodeset(struct intel_crtc_state *crtc_state) +{ + intel_crtc_copy_color_blobs(crtc_state); +} + +static void +intel_crtc_copy_uapi_to_hw_state(struct intel_crtc_state *crtc_state) +{ + crtc_state->hw.enable = crtc_state->uapi.enable; + crtc_state->hw.active = crtc_state->uapi.active; + crtc_state->hw.mode = crtc_state->uapi.mode; + crtc_state->hw.adjusted_mode = crtc_state->uapi.adjusted_mode; + intel_crtc_copy_uapi_to_hw_state_nomodeset(crtc_state); +} + +static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state) +{ + crtc_state->uapi.enable = crtc_state->hw.enable; + crtc_state->uapi.active = crtc_state->hw.active; + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0); + + crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; + + /* copy color blobs to uapi */ + drm_property_replace_blob(&crtc_state->uapi.degamma_lut, + crtc_state->hw.degamma_lut); + drm_property_replace_blob(&crtc_state->uapi.gamma_lut, + crtc_state->hw.gamma_lut); + drm_property_replace_blob(&crtc_state->uapi.ctm, + crtc_state->hw.ctm); +} + static int -clear_intel_crtc_state(struct intel_crtc_state *crtc_state) +intel_crtc_prepare_cleared_state(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = - to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_crtc_state *saved_state; - saved_state = kzalloc(sizeof(*saved_state), GFP_KERNEL); + saved_state = intel_crtc_state_alloc(crtc); if (!saved_state) return -ENOMEM; + /* free the old crtc_state->hw members */ + intel_crtc_free_hw_state(crtc_state); + /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be * fixed, so that the crtc_state can be safely duplicated. For now, * only fields that are know to not cause problems are preserved. */ + saved_state->uapi = crtc_state->uapi; saved_state->scaler_state = crtc_state->scaler_state; saved_state->shared_dpll = crtc_state->shared_dpll; saved_state->dpll_hw_state = crtc_state->dpll_hw_state; + memcpy(saved_state->icl_port_dplls, crtc_state->icl_port_dplls, + sizeof(saved_state->icl_port_dplls)); saved_state->crc_enabled = crtc_state->crc_enabled; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) saved_state->wm = crtc_state->wm; + /* + * Save the slave bitmask which gets filled for master crtc state during + * slave atomic check call. For all other CRTCs reset the port sync variables + * crtc_state->master_transcoder needs to be set to INVALID + */ + reset_port_sync_mode_state(saved_state); + if (intel_atomic_is_master_connector(crtc_state)) + saved_state->sync_mode_slaves_mask = + crtc_state->sync_mode_slaves_mask; - /* Keep base drm_crtc_state intact, only clear our extended struct */ - BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); - memcpy(&crtc_state->base + 1, &saved_state->base + 1, - sizeof(*crtc_state) - sizeof(crtc_state->base)); - + memcpy(crtc_state, saved_state, sizeof(*crtc_state)); kfree(saved_state); + + intel_crtc_copy_uapi_to_hw_state(crtc_state); + return 0; } static int intel_modeset_pipe_config(struct intel_crtc_state *pipe_config) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_atomic_state *state = pipe_config->base.state; + struct drm_crtc *crtc = pipe_config->uapi.crtc; + struct drm_atomic_state *state = pipe_config->uapi.state; struct intel_encoder *encoder; struct drm_connector *connector; struct drm_connector_state *connector_state; int base_bpp, ret; - int i; + int i, tile_group_id = -1, num_tiled_conns = 0; bool retry = true; - ret = clear_intel_crtc_state(pipe_config); - if (ret) - return ret; - pipe_config->cpu_transcoder = (enum transcoder) to_intel_crtc(crtc)->pipe; @@ -12128,13 +13093,13 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config) * positive or negative polarity is requested, treat this as meaning * negative polarity. */ - if (!(pipe_config->base.adjusted_mode.flags & + if (!(pipe_config->hw.adjusted_mode.flags & (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC))) - pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC; + pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC; - if (!(pipe_config->base.adjusted_mode.flags & + if (!(pipe_config->hw.adjusted_mode.flags & (DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC))) - pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC; + pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC; ret = compute_baseline_pipe_bpp(to_intel_crtc(crtc), pipe_config); @@ -12151,7 +13116,7 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config) * computation to clearly distinguish it from the adjusted mode, which * can be changed by the connectors in the below retry loop. */ - drm_mode_get_hv_timing(&pipe_config->base.mode, + drm_mode_get_hv_timing(&pipe_config->hw.mode, &pipe_config->pipe_src_w, &pipe_config->pipe_src_h); @@ -12184,9 +13149,27 @@ encoder_retry: pipe_config->pixel_multiplier = 1; /* Fill in default crtc timings, allow encoders to overwrite them. */ - drm_mode_set_crtcinfo(&pipe_config->base.adjusted_mode, + drm_mode_set_crtcinfo(&pipe_config->hw.adjusted_mode, CRTC_STEREO_DOUBLE); + /* Get tile_group_id of tiled connector */ + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector_state->crtc == crtc && + connector->has_tile) { + tile_group_id = connector->tile_group->id; + break; + } + } + + /* Get total number of tiled connectors in state that belong to + * this tile group. + */ + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector->has_tile && + connector->tile_group->id == tile_group_id) + num_tiled_conns++; + } + /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. @@ -12195,6 +13178,14 @@ encoder_retry: if (connector_state->crtc != crtc) continue; + ret = icl_compute_port_sync_crtc_state(connector, pipe_config, + num_tiled_conns); + if (ret) { + DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n", + ret); + return ret; + } + encoder = to_intel_encoder(connector_state->best_encoder); ret = encoder->compute_config(encoder, pipe_config, connector_state); @@ -12209,7 +13200,7 @@ encoder_retry: /* Set default port clock if not overwritten by the encoder. Needs to be * done afterwards in case the encoder adjusts the mode. */ if (!pipe_config->port_clock) - pipe_config->port_clock = pipe_config->base.adjusted_mode.crtc_clock + pipe_config->port_clock = pipe_config->hw.adjusted_mode.crtc_clock * pipe_config->pixel_multiplier; ret = intel_crtc_compute_config(to_intel_crtc(crtc), pipe_config); @@ -12238,6 +13229,12 @@ encoder_retry: DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); + /* + * Make drm_calc_timestamping_constants in + * drm_atomic_helper_update_legacy_modeset_state() happy + */ + pipe_config->uapi.adjusted_mode = pipe_config->hw.adjusted_mode; + return 0; } @@ -12316,25 +13313,26 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, const union hdmi_infoframe *b) { if (fastset) { - if ((drm_debug & DRM_UT_KMS) == 0) + if (!drm_debug_enabled(DRM_UT_KMS)) return; - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s infoframe", name); - drm_dbg(DRM_UT_KMS, "expected:"); + DRM_DEBUG_KMS("fastset mismatch in %s infoframe\n", name); + DRM_DEBUG_KMS("expected:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, a); - drm_dbg(DRM_UT_KMS, "found"); + DRM_DEBUG_KMS("found:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, b); } else { - drm_err("mismatch in %s infoframe", name); - drm_err("expected:"); + DRM_ERROR("mismatch in %s infoframe\n", name); + DRM_ERROR("expected:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, a); - drm_err("found"); + DRM_ERROR("found:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, b); } } -static void __printf(3, 4) -pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) +static void __printf(4, 5) +pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, + const char *name, const char *format, ...) { struct va_format vaf; va_list args; @@ -12344,9 +13342,11 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) vaf.va = &args; if (fastset) - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf); + DRM_DEBUG_KMS("[CRTC:%d:%s] fastset mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); else - drm_err("mismatch in %s %pV", name, &vaf); + DRM_ERROR("[CRTC:%d:%s] mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); va_end(args); } @@ -12373,11 +13373,13 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, const struct intel_crtc_state *pipe_config, bool fastset) { - struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(current_config->uapi.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); bool ret = true; + u32 bp_gamma = 0; bool fixup_inherited = fastset && - (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && - !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); + (current_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) && + !(pipe_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED); if (fixup_inherited && !fastboot_enabled(dev_priv)) { DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); @@ -12386,8 +13388,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected 0x%08x, found 0x%08x)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12396,8 +13398,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12406,8 +13408,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %s, found %s)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12423,8 +13425,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12433,8 +13435,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %p, found %p)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %p, found %p)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12445,9 +13447,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12472,10 +13474,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, &pipe_config->name, !fastset) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ &pipe_config->name, !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ "or tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12497,8 +13499,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(%x) (expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(%x) (expected %i, found %i)", \ (mask), \ current_config->name & (mask), \ pipe_config->name & (mask)); \ @@ -12508,8 +13510,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12526,6 +13528,24 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) +#define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ + if (current_config->name1 != pipe_config->name1) { \ + pipe_config_mismatch(fastset, crtc, __stringify(name1), \ + "(expected %i, found %i, won't compare lut values)", \ + current_config->name1, \ + pipe_config->name1); \ + ret = false;\ + } else { \ + if (!intel_color_lut_equal(current_config->name2, \ + pipe_config->name2, pipe_config->name1, \ + bit_precision)) { \ + pipe_config_mismatch(fastset, crtc, __stringify(name2), \ + "hw_state doesn't match sw_state"); \ + ret = false; \ + } \ + } \ +} while (0) + #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -12548,22 +13568,23 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(output_types); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hdisplay); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_htotal); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_start); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_end); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hsync_start); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hsync_end); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hdisplay); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_htotal); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hblank_start); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hblank_end); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hsync_start); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hsync_end); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vdisplay); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vtotal); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vblank_start); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vblank_end); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_start); - PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_end); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vdisplay); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vtotal); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vblank_start); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vblank_end); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vsync_start); + PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vsync_end); PIPE_CONF_CHECK_I(pixel_multiplier); PIPE_CONF_CHECK_I(output_format); + PIPE_CONF_CHECK_I(dc3co_exitline); PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -12572,20 +13593,21 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(hdmi_scrambling); PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL(fec_enable); PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); - PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, + PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags, DRM_MODE_FLAG_INTERLACE); if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_MODE_SYNC_FLAGS)) { - PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, + PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags, DRM_MODE_FLAG_PHSYNC); - PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, + PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags, DRM_MODE_FLAG_NHSYNC); - PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, + PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags, DRM_MODE_FLAG_PVSYNC); - PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, + PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags, DRM_MODE_FLAG_NVSYNC); } @@ -12621,6 +13643,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(csc_mode); PIPE_CONF_CHECK_BOOL(gamma_enable); PIPE_CONF_CHECK_BOOL(csc_enable); + + bp_gamma = intel_color_get_gamma_bit_precision(pipe_config); + if (bp_gamma) + PIPE_CONF_CHECK_COLOR_LUT(gamma_mode, hw.gamma_lut, bp_gamma); + } PIPE_CONF_CHECK_BOOL(double_wide); @@ -12664,7 +13691,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) PIPE_CONF_CHECK_I(pipe_bpp); - PIPE_CONF_CHECK_CLOCK_FUZZY(base.adjusted_mode.crtc_clock); + PIPE_CONF_CHECK_CLOCK_FUZZY(hw.adjusted_mode.crtc_clock); PIPE_CONF_CHECK_CLOCK_FUZZY(port_clock); PIPE_CONF_CHECK_I(min_voltage_level); @@ -12676,6 +13703,15 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_INFOFRAME(hdmi); PIPE_CONF_CHECK_INFOFRAME(drm); + PIPE_CONF_CHECK_I(sync_mode_slaves_mask); + PIPE_CONF_CHECK_I(master_transcoder); + + PIPE_CONF_CHECK_I(dsc.compression_enable); + PIPE_CONF_CHECK_I(dsc.dsc_split); + PIPE_CONF_CHECK_I(dsc.compressed_bpp); + + PIPE_CONF_CHECK_I(mst_master_transcoder); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL @@ -12683,6 +13719,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY +#undef PIPE_CONF_CHECK_COLOR_LUT #undef PIPE_CONF_QUIRK return ret; @@ -12694,7 +13731,7 @@ static void intel_pipe_config_sanity_check(struct drm_i915_private *dev_priv, if (pipe_config->has_pch_encoder) { int fdi_dotclock = intel_dotclock_calculate(intel_fdi_link_freq(dev_priv, pipe_config), &pipe_config->fdi_m_n); - int dotclock = pipe_config->base.adjusted_mode.crtc_clock; + int dotclock = pipe_config->hw.adjusted_mode.crtc_clock; /* * FDI already provided one idea for the dotclock. @@ -12706,10 +13743,10 @@ static void intel_pipe_config_sanity_check(struct drm_i915_private *dev_priv, } } -static void verify_wm_state(struct drm_crtc *crtc, - struct drm_crtc_state *new_state) +static void verify_wm_state(struct intel_crtc *crtc, + struct intel_crtc_state *new_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_hw_state { struct skl_ddb_entry ddb_y[I915_MAX_PLANES]; struct skl_ddb_entry ddb_uv[I915_MAX_PLANES]; @@ -12719,21 +13756,20 @@ static void verify_wm_state(struct drm_crtc *crtc, struct skl_ddb_allocation *sw_ddb; struct skl_pipe_wm *sw_wm; struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - const enum pipe pipe = intel_crtc->pipe; + const enum pipe pipe = crtc->pipe; int plane, level, max_level = ilk_wm_max_level(dev_priv); - if (INTEL_GEN(dev_priv) < 9 || !new_state->active) + if (INTEL_GEN(dev_priv) < 9 || !new_crtc_state->hw.active) return; hw = kzalloc(sizeof(*hw), GFP_KERNEL); if (!hw) return; - skl_pipe_wm_get_hw_state(intel_crtc, &hw->wm); - sw_wm = &to_intel_crtc_state(new_state)->wm.skl.optimal; + skl_pipe_wm_get_hw_state(crtc, &hw->wm); + sw_wm = &new_crtc_state->wm.skl.optimal; - skl_pipe_ddb_get_hw_state(intel_crtc, hw->ddb_y, hw->ddb_uv); + skl_pipe_ddb_get_hw_state(crtc, hw->ddb_y, hw->ddb_uv); skl_ddb_get_hw_state(dev_priv, &hw->ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; @@ -12781,7 +13817,7 @@ static void verify_wm_state(struct drm_crtc *crtc, /* DDB */ hw_ddb_entry = &hw->ddb_y[plane]; - sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[plane]; + sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb_y[plane]; if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { DRM_ERROR("mismatch in DDB state pipe %c plane %d (expected (%u,%u), found (%u,%u))\n", @@ -12833,7 +13869,7 @@ static void verify_wm_state(struct drm_crtc *crtc, /* DDB */ hw_ddb_entry = &hw->ddb_y[PLANE_CURSOR]; - sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[PLANE_CURSOR]; + sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR]; if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { DRM_ERROR("mismatch in DDB state pipe %c cursor (expected (%u,%u), found (%u,%u))\n", @@ -12847,23 +13883,22 @@ static void verify_wm_state(struct drm_crtc *crtc, } static void -verify_connector_state(struct drm_device *dev, - struct drm_atomic_state *state, - struct drm_crtc *crtc) +verify_connector_state(struct intel_atomic_state *state, + struct intel_crtc *crtc) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; int i; - for_each_new_connector_in_state(state, connector, new_conn_state, i) { + for_each_new_connector_in_state(&state->base, connector, new_conn_state, i) { struct drm_encoder *encoder = connector->encoder; - struct drm_crtc_state *crtc_state = NULL; + struct intel_crtc_state *crtc_state = NULL; - if (new_conn_state->crtc != crtc) + if (new_conn_state->crtc != &crtc->base) continue; if (crtc) - crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); intel_connector_verify_state(crtc_state, new_conn_state); @@ -12873,14 +13908,14 @@ verify_connector_state(struct drm_device *dev, } static void -verify_encoder_state(struct drm_device *dev, struct drm_atomic_state *state) +verify_encoder_state(struct drm_i915_private *dev_priv, struct intel_atomic_state *state) { struct intel_encoder *encoder; struct drm_connector *connector; struct drm_connector_state *old_conn_state, *new_conn_state; int i; - for_each_intel_encoder(dev, encoder) { + for_each_intel_encoder(&dev_priv->drm, encoder) { bool enabled = false, found = false; enum pipe pipe; @@ -12888,7 +13923,7 @@ verify_encoder_state(struct drm_device *dev, struct drm_atomic_state *state) encoder->base.base.id, encoder->base.name); - for_each_oldnew_connector_in_state(state, connector, old_conn_state, + for_each_oldnew_connector_in_state(&state->base, connector, old_conn_state, new_conn_state, i) { if (old_conn_state->best_encoder == &encoder->base) found = true; @@ -12922,50 +13957,50 @@ verify_encoder_state(struct drm_device *dev, struct drm_atomic_state *state) } static void -verify_crtc_state(struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state, - struct drm_crtc_state *new_crtc_state) +verify_crtc_state(struct intel_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config, *sw_config; - struct drm_atomic_state *old_state; + struct intel_crtc_state *pipe_config = old_crtc_state; + struct drm_atomic_state *state = old_crtc_state->uapi.state; bool active; - old_state = old_crtc_state->state; - __drm_atomic_helper_crtc_destroy_state(old_crtc_state); - pipe_config = to_intel_crtc_state(old_crtc_state); - memset(pipe_config, 0, sizeof(*pipe_config)); - pipe_config->base.crtc = crtc; - pipe_config->base.state = old_state; + __drm_atomic_helper_crtc_destroy_state(&old_crtc_state->uapi); + intel_crtc_free_hw_state(old_crtc_state); + intel_crtc_state_reset(old_crtc_state, crtc); + old_crtc_state->uapi.state = state; - DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name); + DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.base.id, crtc->base.name); - active = dev_priv->display.get_pipe_config(intel_crtc, pipe_config); + active = dev_priv->display.get_pipe_config(crtc, pipe_config); /* we keep both pipes enabled on 830 */ if (IS_I830(dev_priv)) - active = new_crtc_state->active; + active = new_crtc_state->hw.active; - I915_STATE_WARN(new_crtc_state->active != active, - "crtc active state doesn't match with hw state " - "(expected %i, found %i)\n", new_crtc_state->active, active); + I915_STATE_WARN(new_crtc_state->hw.active != active, + "crtc active state doesn't match with hw state " + "(expected %i, found %i)\n", + new_crtc_state->hw.active, active); - I915_STATE_WARN(intel_crtc->active != new_crtc_state->active, - "transitional active state does not match atomic hw state " - "(expected %i, found %i)\n", new_crtc_state->active, intel_crtc->active); + I915_STATE_WARN(crtc->active != new_crtc_state->hw.active, + "transitional active state does not match atomic hw state " + "(expected %i, found %i)\n", + new_crtc_state->hw.active, crtc->active); - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { enum pipe pipe; active = encoder->get_hw_state(encoder, &pipe); - I915_STATE_WARN(active != new_crtc_state->active, - "[ENCODER:%i] active %i with crtc active %i\n", - encoder->base.base.id, active, new_crtc_state->active); + I915_STATE_WARN(active != new_crtc_state->hw.active, + "[ENCODER:%i] active %i with crtc active %i\n", + encoder->base.base.id, active, + new_crtc_state->hw.active); - I915_STATE_WARN(active && intel_crtc->pipe != pipe, + I915_STATE_WARN(active && crtc->pipe != pipe, "Encoder connected to wrong pipe %c\n", pipe_name(pipe)); @@ -12975,16 +14010,16 @@ verify_crtc_state(struct drm_crtc *crtc, intel_crtc_compute_pixel_rate(pipe_config); - if (!new_crtc_state->active) + if (!new_crtc_state->hw.active) return; intel_pipe_config_sanity_check(dev_priv, pipe_config); - sw_config = to_intel_crtc_state(new_crtc_state); - if (!intel_pipe_config_compare(sw_config, pipe_config, false)) { + if (!intel_pipe_config_compare(new_crtc_state, + pipe_config, false)) { I915_STATE_WARN(1, "pipe state doesn't match!\n"); intel_dump_pipe_config(pipe_config, NULL, "[hw state]"); - intel_dump_pipe_config(sw_config, NULL, "[sw state]"); + intel_dump_pipe_config(new_crtc_state, NULL, "[sw state]"); } } @@ -12997,15 +14032,15 @@ intel_verify_planes(struct intel_atomic_state *state) for_each_new_intel_plane_in_state(state, plane, plane_state, i) - assert_plane(plane, plane_state->slave || - plane_state->base.visible); + assert_plane(plane, plane_state->planar_slave || + plane_state->uapi.visible); } static void verify_single_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, - struct drm_crtc *crtc, - struct drm_crtc_state *new_state) + struct intel_crtc *crtc, + struct intel_crtc_state *new_crtc_state) { struct intel_dpll_hw_state dpll_hw_state; unsigned int crtc_mask; @@ -13035,16 +14070,16 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, return; } - crtc_mask = drm_crtc_mask(crtc); + crtc_mask = drm_crtc_mask(&crtc->base); - if (new_state->active) + if (new_crtc_state->hw.active) I915_STATE_WARN(!(pll->active_mask & crtc_mask), "pll active mismatch (expected pipe %c in active mask 0x%02x)\n", - pipe_name(drm_crtc_index(crtc)), pll->active_mask); + pipe_name(drm_crtc_index(&crtc->base)), pll->active_mask); else I915_STATE_WARN(pll->active_mask & crtc_mask, "pll active mismatch (didn't expect pipe %c in active mask 0x%02x)\n", - pipe_name(drm_crtc_index(crtc)), pll->active_mask); + pipe_name(drm_crtc_index(&crtc->base)), pll->active_mask); I915_STATE_WARN(!(pll->state.crtc_mask & crtc_mask), "pll enabled crtcs mismatch (expected 0x%x in 0x%02x)\n", @@ -13057,51 +14092,47 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, } static void -verify_shared_dpll_state(struct drm_device *dev, struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state, - struct drm_crtc_state *new_crtc_state) +verify_shared_dpll_state(struct intel_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc_state *old_state = to_intel_crtc_state(old_crtc_state); - struct intel_crtc_state *new_state = to_intel_crtc_state(new_crtc_state); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (new_state->shared_dpll) - verify_single_dpll_state(dev_priv, new_state->shared_dpll, crtc, new_crtc_state); + if (new_crtc_state->shared_dpll) + verify_single_dpll_state(dev_priv, new_crtc_state->shared_dpll, crtc, new_crtc_state); - if (old_state->shared_dpll && - old_state->shared_dpll != new_state->shared_dpll) { - unsigned int crtc_mask = drm_crtc_mask(crtc); - struct intel_shared_dpll *pll = old_state->shared_dpll; + if (old_crtc_state->shared_dpll && + old_crtc_state->shared_dpll != new_crtc_state->shared_dpll) { + unsigned int crtc_mask = drm_crtc_mask(&crtc->base); + struct intel_shared_dpll *pll = old_crtc_state->shared_dpll; I915_STATE_WARN(pll->active_mask & crtc_mask, "pll active mismatch (didn't expect pipe %c in active mask)\n", - pipe_name(drm_crtc_index(crtc))); + pipe_name(drm_crtc_index(&crtc->base))); I915_STATE_WARN(pll->state.crtc_mask & crtc_mask, "pll enabled crtcs mismatch (found %x in enabled mask)\n", - pipe_name(drm_crtc_index(crtc))); + pipe_name(drm_crtc_index(&crtc->base))); } } static void -intel_modeset_verify_crtc(struct drm_crtc *crtc, - struct drm_atomic_state *state, - struct drm_crtc_state *old_state, - struct drm_crtc_state *new_state) +intel_modeset_verify_crtc(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) { - if (!needs_modeset(new_state) && - !to_intel_crtc_state(new_state)->update_pipe) + if (!needs_modeset(new_crtc_state) && !new_crtc_state->update_pipe) return; - verify_wm_state(crtc, new_state); - verify_connector_state(crtc->dev, state, crtc); - verify_crtc_state(crtc, old_state, new_state); - verify_shared_dpll_state(crtc->dev, crtc, old_state, new_state); + verify_wm_state(crtc, new_crtc_state); + verify_connector_state(state, crtc); + verify_crtc_state(crtc, old_crtc_state, new_crtc_state); + verify_shared_dpll_state(crtc, old_crtc_state, new_crtc_state); } static void -verify_disabled_dpll_state(struct drm_device *dev) +verify_disabled_dpll_state(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); int i; for (i = 0; i < dev_priv->num_shared_dpll; i++) @@ -13109,18 +14140,23 @@ verify_disabled_dpll_state(struct drm_device *dev) } static void -intel_modeset_verify_disabled(struct drm_device *dev, - struct drm_atomic_state *state) +intel_modeset_verify_disabled(struct drm_i915_private *dev_priv, + struct intel_atomic_state *state) { - verify_encoder_state(dev, state); - verify_connector_state(dev, state, NULL); - verify_disabled_dpll_state(dev); + verify_encoder_state(dev_priv, state); + verify_connector_state(state, NULL); + verify_disabled_dpll_state(dev_priv); } -static void update_scanline_offset(const struct intel_crtc_state *crtc_state) +static void +intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + drm_calc_timestamping_constants(&crtc->base, adjusted_mode); /* * The scanline counter increments at the leading edge of hsync. @@ -13150,7 +14186,6 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) * answer that's slightly in the future. */ if (IS_GEN(dev_priv, 2)) { - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; int vtotal; vtotal = adjusted_mode->crtc_vtotal; @@ -13161,34 +14196,26 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) } else if (HAS_DDI(dev_priv) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { crtc->scanline_offset = 2; - } else + } else { crtc->scanline_offset = 1; + } } static void intel_modeset_clear_plls(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc_state *old_crtc_state, *new_crtc_state; + struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; int i; if (!dev_priv->display.crtc_compute_clock) return; - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { - struct intel_shared_dpll *old_dpll = - old_crtc_state->shared_dpll; - - if (!needs_modeset(&new_crtc_state->base)) - continue; - - new_crtc_state->shared_dpll = NULL; - - if (!old_dpll) + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!needs_modeset(new_crtc_state)) continue; - intel_release_shared_dpll(old_dpll, crtc, &state->base); + intel_release_shared_dplls(state, crtc); } } @@ -13198,7 +14225,7 @@ static void intel_modeset_clear_plls(struct intel_atomic_state *state) * multiple pipes, and planes are enabled after the pipe, we need to wait at * least 2 vblanks on the first pipe before enabling planes on the second pipe. */ -static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) +static int hsw_mode_set_planes_workaround(struct intel_atomic_state *state) { struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; @@ -13209,8 +14236,8 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) /* look at all crtc's that are going to be enabled in during modeset */ for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - if (!crtc_state->base.active || - !needs_modeset(&crtc_state->base)) + if (!crtc_state->hw.active || + !needs_modeset(crtc_state)) continue; if (first_crtc_state) { @@ -13234,8 +14261,8 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) crtc_state->hsw_workaround_pipe = INVALID_PIPE; - if (!crtc_state->base.active || - needs_modeset(&crtc_state->base)) + if (!crtc_state->hw.active || + needs_modeset(crtc_state)) continue; /* 2 or more enabled crtcs means no need for w/a */ @@ -13253,158 +14280,47 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) return 0; } -static int intel_lock_all_pipes(struct drm_atomic_state *state) -{ - struct drm_crtc *crtc; - - /* Add all pipes to the state */ - for_each_crtc(state->dev, crtc) { - struct drm_crtc_state *crtc_state; - - crtc_state = drm_atomic_get_crtc_state(state, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} - -static int intel_modeset_all_pipes(struct drm_atomic_state *state) -{ - struct drm_crtc *crtc; - - /* - * Add all pipes to the state, and force - * a modeset on all the active ones. - */ - for_each_crtc(state->dev, crtc) { - struct drm_crtc_state *crtc_state; - int ret; - - crtc_state = drm_atomic_get_crtc_state(state, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - - if (!crtc_state->active || needs_modeset(crtc_state)) - continue; - - crtc_state->mode_changed = true; - - ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) - return ret; - - ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) - return ret; - } - - return 0; -} - static int intel_modeset_checks(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; - int ret = 0, i; - - if (!check_digital_port_conflicts(state)) { - DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); - return -EINVAL; - } + int ret, i; /* keep the current setting */ if (!state->cdclk.force_min_cdclk_changed) state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk; state->modeset = true; - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; state->cdclk.logical = dev_priv->cdclk.logical; state->cdclk.actual = dev_priv->cdclk.actual; - state->cdclk.pipe = INVALID_PIPE; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (new_crtc_state->base.active) - state->active_crtcs |= 1 << i; + if (new_crtc_state->hw.active) + state->active_pipes |= BIT(crtc->pipe); else - state->active_crtcs &= ~(1 << i); + state->active_pipes &= ~BIT(crtc->pipe); - if (old_crtc_state->base.active != new_crtc_state->base.active) - state->active_pipe_changes |= drm_crtc_mask(&crtc->base); + if (old_crtc_state->hw.active != new_crtc_state->hw.active) + state->active_pipe_changes |= BIT(crtc->pipe); } - /* - * See if the config requires any additional preparation, e.g. - * to adjust global state with pipes off. We need to do this - * here so we can get the modeset_pipe updated config for the new - * mode set on this crtc. For other crtcs we need to use the - * adjusted_mode bits in the crtc directly. - */ - if (dev_priv->display.modeset_calc_cdclk) { - enum pipe pipe; - - ret = dev_priv->display.modeset_calc_cdclk(state); - if (ret < 0) + if (state->active_pipe_changes) { + ret = intel_atomic_lock_global_state(state); + if (ret) return ret; - - /* - * Writes to dev_priv->cdclk.logical must protected by - * holding all the crtc locks, even if we don't end up - * touching the hardware - */ - if (intel_cdclk_changed(&dev_priv->cdclk.logical, - &state->cdclk.logical)) { - ret = intel_lock_all_pipes(&state->base); - if (ret < 0) - return ret; - } - - if (is_power_of_2(state->active_crtcs)) { - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - - pipe = ilog2(state->active_crtcs); - crtc = &intel_get_crtc_for_pipe(dev_priv, pipe)->base; - crtc_state = drm_atomic_get_new_crtc_state(&state->base, crtc); - if (crtc_state && needs_modeset(crtc_state)) - pipe = INVALID_PIPE; - } else { - pipe = INVALID_PIPE; - } - - /* All pipes must be switched off while we change the cdclk. */ - if (pipe != INVALID_PIPE && - intel_cdclk_needs_cd2x_update(dev_priv, - &dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_lock_all_pipes(&state->base); - if (ret < 0) - return ret; - - state->cdclk.pipe = pipe; - } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_modeset_all_pipes(&state->base); - if (ret < 0) - return ret; - - state->cdclk.pipe = INVALID_PIPE; - } - - DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", - state->cdclk.logical.cdclk, - state->cdclk.actual.cdclk); - DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", - state->cdclk.logical.voltage_level, - state->cdclk.actual.voltage_level); } + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; + intel_modeset_clear_plls(state); if (IS_HASWELL(dev_priv)) - return haswell_mode_set_planes_workaround(state); + return hsw_mode_set_planes_workaround(state); return 0; } @@ -13432,9 +14348,13 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta if (!intel_pipe_config_compare(old_crtc_state, new_crtc_state, true)) return; - new_crtc_state->base.mode_changed = false; + new_crtc_state->uapi.mode_changed = false; new_crtc_state->update_pipe = true; +} +static void intel_crtc_copy_fastset(const struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ /* * If we're not doing the full modeset we want to * keep the current M/N values as they may be @@ -13449,6 +14369,201 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->has_drrs = old_crtc_state->has_drrs; } +static int intel_crtc_add_planes_to_state(struct intel_atomic_state *state, + struct intel_crtc *crtc, + u8 plane_ids_mask) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_plane *plane; + + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state; + + if ((plane_ids_mask & BIT(plane->id)) == 0) + continue; + + plane_state = intel_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + +static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv) +{ + /* See {hsw,vlv,ivb}_plane_ratio() */ + return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) || + IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) || + IS_IVYBRIDGE(dev_priv); +} + +static int intel_atomic_check_planes(struct intel_atomic_state *state, + bool *need_modeset) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc_state *old_crtc_state, *new_crtc_state; + struct intel_plane_state *plane_state; + struct intel_plane *plane; + struct intel_crtc *crtc; + int i, ret; + + ret = icl_add_linked_planes(state); + if (ret) + return ret; + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + ret = intel_plane_atomic_check(state, plane); + if (ret) { + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n", + plane->base.base.id, plane->base.name); + return ret; + } + } + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + u8 old_active_planes, new_active_planes; + + ret = icl_check_nv12_planes(new_crtc_state); + if (ret) + return ret; + + /* + * On some platforms the number of active planes affects + * the planes' minimum cdclk calculation. Add such planes + * to the state before we compute the minimum cdclk. + */ + if (!active_planes_affects_min_cdclk(dev_priv)) + continue; + + old_active_planes = old_crtc_state->active_planes & ~BIT(PLANE_CURSOR); + new_active_planes = new_crtc_state->active_planes & ~BIT(PLANE_CURSOR); + + if (hweight8(old_active_planes) == hweight8(new_active_planes)) + continue; + + ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes); + if (ret) + return ret; + } + + /* + * active_planes bitmask has been updated, and potentially + * affected planes are part of the state. We can now + * compute the minimum cdclk for each plane. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) + *need_modeset |= intel_plane_calc_min_cdclk(state, plane); + + return 0; +} + +static int intel_atomic_check_crtcs(struct intel_atomic_state *state) +{ + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret = intel_crtc_atomic_check(state, crtc); + if (ret) { + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n", + crtc->base.base.id, crtc->base.name); + return ret; + } + } + + return 0; +} + +static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state, + u8 transcoders) +{ + const struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->hw.enable && + transcoders & BIT(new_crtc_state->cpu_transcoder) && + needs_modeset(new_crtc_state)) + return true; + } + + return false; +} + +static int +intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + int ret = 0; + + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + + if (!connector->has_tile || + connector->tile_group->id != tile_grp_id) + continue; + conn_state = drm_atomic_get_connector_state(&state->base, + connector); + if (IS_ERR(conn_state)) { + ret = PTR_ERR(conn_state); + break; + } + + if (!conn_state->crtc) + continue; + + crtc_state = drm_atomic_get_crtc_state(&state->base, + conn_state->crtc); + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); + break; + } + crtc_state->mode_changed = true; + ret = drm_atomic_add_affected_connectors(&state->base, + conn_state->crtc); + if (ret) + break; + } + drm_connector_list_iter_end(&conn_iter); + + return ret; +} + +static int +intel_atomic_check_tiled_conns(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector *connector; + struct drm_connector_state *old_conn_state, *new_conn_state; + int i, ret; + + if (INTEL_GEN(dev_priv) < 11) + return 0; + + /* Is tiled, mark all other tiled CRTCs as needing a modeset */ + for_each_oldnew_connector_in_state(&state->base, connector, + old_conn_state, new_conn_state, i) { + if (!connector->has_tile) + continue; + if (!intel_connector_needs_modeset(state, connector)) + continue; + + ret = intel_modeset_all_tiles(state, connector->tile_group->id); + if (ret) + return ret; + } + + return 0; +} + /** * intel_atomic_check - validate state object * @dev: drm device @@ -13462,44 +14577,124 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; int ret, i; - bool any_ms = state->cdclk.force_min_cdclk_changed; + bool any_ms = false; /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (new_crtc_state->base.mode.private_flags != - old_crtc_state->base.mode.private_flags) - new_crtc_state->base.mode_changed = true; + if (new_crtc_state->hw.mode.private_flags != + old_crtc_state->hw.mode.private_flags) + new_crtc_state->uapi.mode_changed = true; } ret = drm_atomic_helper_check_modeset(dev, &state->base); if (ret) goto fail; + /** + * This check adds all the connectors in current state that belong to + * the same tile group to a full modeset. + * This function directly sets the mode_changed to true and we also call + * drm_atomic_add_affected_connectors(). Hence we are not explicitly + * calling drm_atomic_helper_check_modeset() after this. + * + * Fixme: Handle some corner cases where one of the + * tiled connectors gets disconnected and tile info is lost but since it + * was previously synced to other conn, we need to add that to the modeset. + */ + ret = intel_atomic_check_tiled_conns(state); + if (ret) + goto fail; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!needs_modeset(&new_crtc_state->base)) + if (!needs_modeset(new_crtc_state)) { + /* Light copy */ + intel_crtc_copy_uapi_to_hw_state_nomodeset(new_crtc_state); + continue; + } - if (!new_crtc_state->base.enable) { - any_ms = true; + if (!new_crtc_state->uapi.enable) { + intel_crtc_copy_uapi_to_hw_state(new_crtc_state); continue; } + ret = intel_crtc_prepare_cleared_state(new_crtc_state); + if (ret) + goto fail; + ret = intel_modeset_pipe_config(new_crtc_state); if (ret) goto fail; intel_crtc_check_fastset(old_crtc_state, new_crtc_state); + } + + /** + * Check if fastset is allowed by external dependencies like other + * pipes and transcoders. + * + * Right now it only forces a fullmodeset when the MST master + * transcoder did not changed but the pipe of the master transcoder + * needs a fullmodeset so all slaves also needs to do a fullmodeset or + * in case of port synced crtcs, if one of the synced crtcs + * needs a full modeset, all other synced crtcs should be + * forced a full modeset. + */ + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!new_crtc_state->hw.enable || needs_modeset(new_crtc_state)) + continue; + + if (intel_dp_mst_is_slave_trans(new_crtc_state)) { + enum transcoder master = new_crtc_state->mst_master_transcoder; + + if (intel_cpu_transcoders_need_modeset(state, BIT(master))) { + new_crtc_state->uapi.mode_changed = true; + new_crtc_state->update_pipe = false; + } + } - if (needs_modeset(&new_crtc_state->base)) + if (is_trans_port_sync_mode(new_crtc_state)) { + u8 trans = new_crtc_state->sync_mode_slaves_mask | + BIT(new_crtc_state->master_transcoder); + + if (intel_cpu_transcoders_need_modeset(state, trans)) { + new_crtc_state->uapi.mode_changed = true; + new_crtc_state->update_pipe = false; + } + } + } + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (needs_modeset(new_crtc_state)) { any_ms = true; + continue; + } + + if (!new_crtc_state->update_pipe) + continue; + + intel_crtc_copy_fastset(old_crtc_state, new_crtc_state); + } + + if (any_ms && !check_digital_port_conflicts(state)) { + DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); + ret = EINVAL; + goto fail; } ret = drm_dp_mst_atomic_check(&state->base); if (ret) goto fail; + any_ms |= state->cdclk.force_min_cdclk_changed; + + ret = intel_atomic_check_planes(state, &any_ms); + if (ret) + goto fail; + if (any_ms) { ret = intel_modeset_checks(state); if (ret) @@ -13508,11 +14703,7 @@ static int intel_atomic_check(struct drm_device *dev, state->cdclk.logical = dev_priv->cdclk.logical; } - ret = icl_add_linked_planes(state); - if (ret) - goto fail; - - ret = drm_atomic_helper_check_planes(dev, &state->base); + ret = intel_atomic_check_crtcs(state); if (ret) goto fail; @@ -13527,12 +14718,12 @@ static int intel_atomic_check(struct drm_device *dev, for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!needs_modeset(&new_crtc_state->base) && + if (!needs_modeset(new_crtc_state) && !new_crtc_state->update_pipe) continue; intel_dump_pipe_config(new_crtc_state, state, - needs_modeset(&new_crtc_state->base) ? + needs_modeset(new_crtc_state) ? "[modeset]" : "[fastset]"); } @@ -13553,10 +14744,10 @@ static int intel_atomic_check(struct drm_device *dev, return ret; } -static int intel_atomic_prepare_commit(struct drm_device *dev, - struct drm_atomic_state *state) +static int intel_atomic_prepare_commit(struct intel_atomic_state *state) { - return drm_atomic_helper_prepare_planes(dev, state); + return drm_atomic_helper_prepare_planes(state->base.dev, + &state->base); } u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) @@ -13567,60 +14758,239 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) if (!vblank->max_vblank_count) return (u32)drm_crtc_accurate_vblank_count(&crtc->base); - return dev->driver->get_vblank_counter(dev, crtc->pipe); + return crtc->base.funcs->get_vblank_counter(&crtc->base); } -static void intel_update_crtc(struct drm_crtc *crtc, - struct drm_atomic_state *state, - struct drm_crtc_state *old_crtc_state, - struct drm_crtc_state *new_crtc_state) +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN(dev_priv, 2) || crtc_state->active_planes) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + +static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* + * Update pipe size and adjust fitter if needed: the reason for this is + * that in compute_mode_changes we check the native mode (not the pfit + * mode) to see if we can flip rather than do a full mode set. In the + * fastboot case, we'll flip, but if we don't update the pipesrc and + * pfit state, we'll end up with a big fb scanned out into the wrong + * sized surface. + */ + intel_set_pipe_src_size(new_crtc_state); + + /* on skylake this is done by detaching scalers */ + if (INTEL_GEN(dev_priv) >= 9) { + skl_detach_scalers(new_crtc_state); + + if (new_crtc_state->pch_pfit.enabled) + skl_pfit_enable(new_crtc_state); + } else if (HAS_PCH_SPLIT(dev_priv)) { + if (new_crtc_state->pch_pfit.enabled) + ilk_pfit_enable(new_crtc_state); + else if (old_crtc_state->pch_pfit.enabled) + ilk_pfit_disable(old_crtc_state); + } + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); +} + +static void commit_pipe_config(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + bool modeset = needs_modeset(new_crtc_state); + + /* + * During modesets pipe configuration was programmed as the + * CRTC was enabled. + */ + if (!modeset) { + if (new_crtc_state->uapi.color_mgmt_changed || + new_crtc_state->update_pipe) + intel_color_commit(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_detach_scalers(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(new_crtc_state); + + if (new_crtc_state->update_pipe) + intel_pipe_fastset(old_crtc_state, new_crtc_state); + } + + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.atomic_update_watermarks(state, crtc); +} + +static void intel_update_crtc(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); bool modeset = needs_modeset(new_crtc_state); struct intel_plane_state *new_plane_state = - intel_atomic_get_new_plane_state(to_intel_atomic_state(state), - to_intel_plane(crtc->primary)); + intel_atomic_get_new_plane_state(state, + to_intel_plane(crtc->base.primary)); if (modeset) { - update_scanline_offset(pipe_config); - dev_priv->display.crtc_enable(pipe_config, state); + intel_crtc_update_active_timings(new_crtc_state); + + dev_priv->display.crtc_enable(state, crtc); /* vblanks work again, re-enable pipe CRC. */ - intel_crtc_enable_pipe_crc(intel_crtc); + intel_crtc_enable_pipe_crc(crtc); } else { - intel_pre_plane_update(to_intel_crtc_state(old_crtc_state), - pipe_config); + if (new_crtc_state->preload_luts && + (new_crtc_state->uapi.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); + + intel_pre_plane_update(state, crtc); - if (pipe_config->update_pipe) - intel_encoders_update_pipe(crtc, pipe_config, state); + if (new_crtc_state->update_pipe) + intel_encoders_update_pipe(state, crtc); } - if (pipe_config->update_pipe && !pipe_config->enable_fbc) - intel_fbc_disable(intel_crtc); + if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc) + intel_fbc_disable(crtc); else if (new_plane_state) - intel_fbc_enable(intel_crtc, pipe_config, new_plane_state); + intel_fbc_enable(crtc, new_crtc_state, new_plane_state); - intel_begin_crtc_commit(to_intel_atomic_state(state), intel_crtc); + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + + commit_pipe_config(state, old_crtc_state, new_crtc_state); if (INTEL_GEN(dev_priv) >= 9) - skl_update_planes_on_crtc(to_intel_atomic_state(state), intel_crtc); + skl_update_planes_on_crtc(state, crtc); else - i9xx_update_planes_on_crtc(to_intel_atomic_state(state), intel_crtc); + i9xx_update_planes_on_crtc(state, crtc); + + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); +} + +static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(new_crtc_state->uapi.crtc->dev); + enum transcoder slave_transcoder; + + WARN_ON(!is_power_of_2(new_crtc_state->sync_mode_slaves_mask)); - intel_finish_crtc_commit(to_intel_atomic_state(state), intel_crtc); + slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1; + return intel_get_crtc_for_pipe(dev_priv, + (enum pipe)slave_transcoder); } -static void intel_update_crtcs(struct drm_atomic_state *state) +static void intel_old_crtc_state_disables(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state, + struct intel_crtc *crtc) { - struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_disable_planes(state, crtc); + + /* + * We need to disable pipe CRC before disabling the pipe, + * or we race against vblank off. + */ + intel_crtc_disable_pipe_crc(crtc); + + dev_priv->display.crtc_disable(state, crtc); + crtc->active = false; + intel_fbc_disable(crtc); + intel_disable_shared_dpll(old_crtc_state); + + /* FIXME unify this for all platforms */ + if (!new_crtc_state->hw.active && + !HAS_GMCH(dev_priv) && + dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, crtc); +} + +static void intel_commit_modeset_disables(struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state, *old_crtc_state; + struct intel_crtc *crtc; + u32 handled = 0; int i; - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!new_crtc_state->active) + /* Only disable port sync and MST slaves */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (!needs_modeset(new_crtc_state)) + continue; + + if (!old_crtc_state->hw.active) + continue; + + /* In case of Transcoder port Sync master slave CRTCs can be + * assigned in any order and we need to make sure that + * slave CRTCs are disabled first and then master CRTC since + * Slave vblanks are masked till Master Vblanks. + */ + if (!is_trans_port_sync_slave(old_crtc_state) && + !intel_dp_mst_is_slave_trans(old_crtc_state)) + continue; + + intel_pre_plane_update(state, crtc); + intel_old_crtc_state_disables(state, old_crtc_state, + new_crtc_state, crtc); + handled |= BIT(crtc->pipe); + } + + /* Disable everything else left on */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (!needs_modeset(new_crtc_state) || + (handled & BIT(crtc->pipe))) + continue; + + intel_pre_plane_update(state, crtc); + if (old_crtc_state->hw.active) + intel_old_crtc_state_disables(state, old_crtc_state, + new_crtc_state, crtc); + } +} + +static void intel_commit_modeset_enables(struct intel_atomic_state *state) +{ + struct intel_crtc *crtc; + struct intel_crtc_state *old_crtc_state, *new_crtc_state; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (!new_crtc_state->hw.active) continue; intel_update_crtc(crtc, state, old_crtc_state, @@ -13628,26 +14998,141 @@ static void intel_update_crtcs(struct drm_atomic_state *state) } } -static void skl_update_crtcs(struct drm_atomic_state *state) +static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *new_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - struct intel_crtc_state *cstate; - unsigned int updated = 0; - bool progress; - enum pipe pipe; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_update_active_timings(new_crtc_state); + dev_priv->display.crtc_enable(state, crtc); + intel_crtc_enable_pipe_crc(crtc); +} + +static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct drm_connector *uninitialized_var(conn); + struct drm_connector_state *conn_state; + struct intel_dp *intel_dp; int i; + + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { + if (conn_state->crtc == &crtc->base) + break; + } + intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(conn))); + intel_dp_stop_link_train(intel_dp); +} + +/* + * TODO: This is only called from port sync and it is identical to what will be + * executed again in intel_update_crtc() over port sync pipes + */ +static void intel_post_crtc_enable_updates(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, + to_intel_plane(crtc->base.primary)); + bool modeset = needs_modeset(new_crtc_state); + + if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc) + intel_fbc_disable(crtc); + else if (new_plane_state) + intel_fbc_enable(crtc, new_crtc_state, new_plane_state); + + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + commit_pipe_config(state, old_crtc_state, new_crtc_state); + skl_update_planes_on_crtc(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); +} + +static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state); + struct intel_crtc_state *new_slave_crtc_state = + intel_atomic_get_new_crtc_state(state, slave_crtc); + struct intel_crtc_state *old_slave_crtc_state = + intel_atomic_get_old_crtc_state(state, slave_crtc); + + WARN_ON(!slave_crtc || !new_slave_crtc_state || + !old_slave_crtc_state); + + DRM_DEBUG_KMS("Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n", + crtc->base.base.id, crtc->base.name, slave_crtc->base.base.id, + slave_crtc->base.name); + + /* Enable seq for slave with with DP_TP_CTL left Idle until the + * master is ready + */ + intel_crtc_enable_trans_port_sync(slave_crtc, + state, + new_slave_crtc_state); + + /* Enable seq for master with with DP_TP_CTL left Idle */ + intel_crtc_enable_trans_port_sync(crtc, + state, + new_crtc_state); + + /* Set Slave's DP_TP_CTL to Normal */ + intel_set_dp_tp_ctl_normal(slave_crtc, + state); + + /* Set Master's DP_TP_CTL To Normal */ + usleep_range(200, 400); + intel_set_dp_tp_ctl_normal(crtc, + state); + + /* Now do the post crtc enable for all master and slaves */ + intel_post_crtc_enable_updates(slave_crtc, + state); + intel_post_crtc_enable_updates(crtc, + state); +} + +static void skl_commit_modeset_enables(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *old_crtc_state, *new_crtc_state; u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; - u8 required_slices = intel_state->wm_results.ddb.enabled_slices; + u8 required_slices = state->wm_results.ddb.enabled_slices; struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; + const u8 num_pipes = INTEL_NUM_PIPES(dev_priv); + u8 update_pipes = 0, modeset_pipes = 0; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (!new_crtc_state->hw.active) + continue; - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) /* ignore allocations for crtc's that have been turned off. */ - if (new_crtc_state->active) - entries[i] = to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; + if (!needs_modeset(new_crtc_state)) { + entries[i] = old_crtc_state->wm.skl.ddb; + update_pipes |= BIT(crtc->pipe); + } else { + modeset_pipes |= BIT(crtc->pipe); + } + } /* If 2nd DBuf slice required, enable it here */ if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) @@ -13656,30 +15141,29 @@ static void skl_update_crtcs(struct drm_atomic_state *state) /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations - * never overlap with eachother inbetween CRTC updates. Otherwise we'll + * never overlap with each other between CRTC updates. Otherwise we'll * cause pipe underruns and other bad stuff. + * + * So first lets enable all pipes that do not need a fullmodeset as + * those don't have any external dependency. */ - do { - progress = false; - - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - bool vbl_wait = false; - unsigned int cmask = drm_crtc_mask(crtc); - - intel_crtc = to_intel_crtc(crtc); - cstate = to_intel_crtc_state(new_crtc_state); - pipe = intel_crtc->pipe; + while (update_pipes) { + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + enum pipe pipe = crtc->pipe; - if (updated & cmask || !cstate->base.active) + if ((update_pipes & BIT(pipe)) == 0) continue; - if (skl_ddb_allocation_overlaps(&cstate->wm.skl.ddb, - entries, - INTEL_INFO(dev_priv)->num_pipes, i)) + if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, + entries, num_pipes, i)) continue; - updated |= cmask; - entries[i] = cstate->wm.skl.ddb; + entries[i] = new_crtc_state->wm.skl.ddb; + update_pipes &= ~BIT(pipe); + + intel_update_crtc(crtc, state, old_crtc_state, + new_crtc_state); /* * If this is an already active pipe, it's DDB changed, @@ -13687,21 +15171,73 @@ static void skl_update_crtcs(struct drm_atomic_state *state) * then we need to wait for a vblank to pass for the * new ddb allocation to take effect. */ - if (!skl_ddb_entry_equal(&cstate->wm.skl.ddb, - &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb) && - !new_crtc_state->active_changed && - intel_state->wm_results.dirty_pipes != updated) - vbl_wait = true; + if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb, + &old_crtc_state->wm.skl.ddb) && + (update_pipes | modeset_pipes)) + intel_wait_for_vblank(dev_priv, pipe); + } + } + + /* + * Enable all pipes that needs a modeset and do not depends on other + * pipes + */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + enum pipe pipe = crtc->pipe; + + if ((modeset_pipes & BIT(pipe)) == 0) + continue; + + if (intel_dp_mst_is_slave_trans(new_crtc_state) || + is_trans_port_sync_slave(new_crtc_state)) + continue; + + WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, + entries, num_pipes, i)); + + entries[i] = new_crtc_state->wm.skl.ddb; + modeset_pipes &= ~BIT(pipe); + + if (is_trans_port_sync_mode(new_crtc_state)) { + struct intel_crtc *slave_crtc; + intel_update_trans_port_sync_crtcs(crtc, state, + old_crtc_state, + new_crtc_state); + + slave_crtc = intel_get_slave_crtc(new_crtc_state); + /* TODO: update entries[] of slave */ + modeset_pipes &= ~BIT(slave_crtc->pipe); + + } else { intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state); + } + } - if (vbl_wait) - intel_wait_for_vblank(dev_priv, pipe); + /* + * Finally enable all pipes that needs a modeset and depends on + * other pipes, right now it is only MST slaves as both port sync slave + * and master are enabled together + */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + enum pipe pipe = crtc->pipe; - progress = true; - } - } while (progress); + if ((modeset_pipes & BIT(pipe)) == 0) + continue; + + WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, + entries, num_pipes, i)); + + entries[i] = new_crtc_state->wm.skl.ddb; + modeset_pipes &= ~BIT(pipe); + + intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state); + } + + WARN_ON(modeset_pipes); /* If 2nd DBuf slice is no more required disable it */ if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) @@ -13736,18 +15272,21 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat for (;;) { prepare_to_wait(&intel_state->commit_ready.wait, &wait_fence, TASK_UNINTERRUPTIBLE); - prepare_to_wait(&dev_priv->gpu_error.wait_queue, + prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + I915_RESET_MODESET), &wait_reset, TASK_UNINTERRUPTIBLE); - if (i915_sw_fence_done(&intel_state->commit_ready) - || test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) + if (i915_sw_fence_done(&intel_state->commit_ready) || + test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) break; schedule(); } finish_wait(&intel_state->commit_ready.wait, &wait_fence); - finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset); + finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + I915_RESET_MODESET), + &wait_reset); } static void intel_atomic_cleanup_work(struct work_struct *work) @@ -13763,85 +15302,46 @@ static void intel_atomic_cleanup_work(struct work_struct *work) intel_atomic_helper_free_state(i915); } -static void intel_atomic_commit_tail(struct drm_atomic_state *state) +static void intel_atomic_commit_tail(struct intel_atomic_state *state) { - struct drm_device *dev = state->dev; - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_device *dev = state->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - struct intel_crtc_state *new_intel_crtc_state, *old_intel_crtc_state; - struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; + struct intel_crtc_state *new_crtc_state, *old_crtc_state; + struct intel_crtc *crtc; u64 put_domains[I915_MAX_PIPES] = {}; intel_wakeref_t wakeref = 0; int i; - intel_atomic_commit_fence_wait(intel_state); + intel_atomic_commit_fence_wait(state); - drm_atomic_helper_wait_for_dependencies(state); + drm_atomic_helper_wait_for_dependencies(&state->base); - if (intel_state->modeset) + if (state->modeset) wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - old_intel_crtc_state = to_intel_crtc_state(old_crtc_state); - new_intel_crtc_state = to_intel_crtc_state(new_crtc_state); - intel_crtc = to_intel_crtc(crtc); - + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { if (needs_modeset(new_crtc_state) || - to_intel_crtc_state(new_crtc_state)->update_pipe) { + new_crtc_state->update_pipe) { - put_domains[intel_crtc->pipe] = - modeset_get_crtc_power_domains(crtc, - new_intel_crtc_state); - } - - if (!needs_modeset(new_crtc_state)) - continue; - - intel_pre_plane_update(old_intel_crtc_state, new_intel_crtc_state); - - if (old_crtc_state->active) { - intel_crtc_disable_planes(intel_state, intel_crtc); - - /* - * We need to disable pipe CRC before disabling the pipe, - * or we race against vblank off. - */ - intel_crtc_disable_pipe_crc(intel_crtc); - - dev_priv->display.crtc_disable(old_intel_crtc_state, state); - intel_crtc->active = false; - intel_fbc_disable(intel_crtc); - intel_disable_shared_dpll(old_intel_crtc_state); - - /* - * Underruns don't always raise - * interrupts, so check manually. - */ - intel_check_cpu_fifo_underruns(dev_priv); - intel_check_pch_fifo_underruns(dev_priv); - - /* FIXME unify this for all platforms */ - if (!new_crtc_state->active && - !HAS_GMCH(dev_priv) && - dev_priv->display.initial_watermarks) - dev_priv->display.initial_watermarks(intel_state, - new_intel_crtc_state); + put_domains[crtc->pipe] = + modeset_get_crtc_power_domains(new_crtc_state); } } - /* FIXME: Eventually get rid of our intel_crtc->config pointer */ - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); + intel_commit_modeset_disables(state); - if (intel_state->modeset) { - drm_atomic_helper_update_legacy_modeset_state(state->dev, state); + /* FIXME: Eventually get rid of our crtc->config pointer */ + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) + crtc->config = new_crtc_state; + + if (state->modeset) { + drm_atomic_helper_update_legacy_modeset_state(dev, &state->base); intel_set_cdclk_pre_plane_update(dev_priv, - &intel_state->cdclk.actual, + &state->cdclk.actual, &dev_priv->cdclk.actual, - intel_state->cdclk.pipe); + state->cdclk.pipe); /* * SKL workaround: bspec recommends we disable the SAGV when we @@ -13850,31 +15350,38 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_can_enable_sagv(state)) intel_disable_sagv(dev_priv); - intel_modeset_verify_disabled(dev, state); + intel_modeset_verify_disabled(dev_priv, state); } /* Complete the events for pipes that have now been disabled */ - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { bool modeset = needs_modeset(new_crtc_state); /* Complete events for now disable pipes here. */ - if (modeset && !new_crtc_state->active && new_crtc_state->event) { + if (modeset && !new_crtc_state->hw.active && new_crtc_state->uapi.event) { spin_lock_irq(&dev->event_lock); - drm_crtc_send_vblank_event(crtc, new_crtc_state->event); + drm_crtc_send_vblank_event(&crtc->base, + new_crtc_state->uapi.event); spin_unlock_irq(&dev->event_lock); - new_crtc_state->event = NULL; + new_crtc_state->uapi.event = NULL; } } + if (state->modeset) + intel_encoders_update_prepare(state); + /* Now enable the clocks, plane, pipe, and connectors that we set up. */ - dev_priv->display.update_crtcs(state); + dev_priv->display.commit_modeset_enables(state); + + if (state->modeset) { + intel_encoders_update_complete(state); - if (intel_state->modeset) intel_set_cdclk_post_plane_update(dev_priv, - &intel_state->cdclk.actual, + &state->cdclk.actual, &dev_priv->cdclk.actual, - intel_state->cdclk.pipe); + state->cdclk.pipe); + } /* FIXME: We should call drm_atomic_helper_commit_hw_done() here * already, but still need the state for the delayed optimization. To @@ -13885,16 +15392,15 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * - switch over to the vblank wait helper in the core after that since * we don't need out special handling any more. */ - drm_atomic_helper_wait_for_flip_done(dev, state); + drm_atomic_helper_wait_for_flip_done(dev, &state->base); - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - new_intel_crtc_state = to_intel_crtc_state(new_crtc_state); - - if (new_crtc_state->active && + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->hw.active && !needs_modeset(new_crtc_state) && - (new_intel_crtc_state->base.color_mgmt_changed || - new_intel_crtc_state->update_pipe)) - intel_color_load_luts(new_intel_crtc_state); + !new_crtc_state->preload_luts && + (new_crtc_state->uapi.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); } /* @@ -13904,16 +15410,25 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * * TODO: Move this (and other cleanup) to an async worker eventually. */ - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - new_intel_crtc_state = to_intel_crtc_state(new_crtc_state); + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + /* + * Gen2 reports pipe underruns whenever all planes are disabled. + * So re-enable underrun reporting after some planes get enabled. + * + * We do this before .optimize_watermarks() so that we have a + * chance of catching underruns with the intermediate watermarks + * vs. the new plane configuration. + */ + if (IS_GEN(dev_priv, 2) && planes_enabling(old_crtc_state, new_crtc_state)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); if (dev_priv->display.optimize_watermarks) - dev_priv->display.optimize_watermarks(intel_state, - new_intel_crtc_state); + dev_priv->display.optimize_watermarks(state, crtc); } - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - intel_post_plane_update(to_intel_crtc_state(old_crtc_state)); + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + intel_post_plane_update(state, crtc); if (put_domains[i]) modeset_put_power_domains(dev_priv, put_domains[i]); @@ -13921,15 +15436,19 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state); } - if (intel_state->modeset) - intel_verify_planes(intel_state); + /* Underruns don't always raise interrupts, so check manually */ + intel_check_cpu_fifo_underruns(dev_priv); + intel_check_pch_fifo_underruns(dev_priv); - if (intel_state->modeset && intel_can_enable_sagv(state)) + if (state->modeset) + intel_verify_planes(state); + + if (state->modeset && intel_can_enable_sagv(state)) intel_enable_sagv(dev_priv); - drm_atomic_helper_commit_hw_done(state); + drm_atomic_helper_commit_hw_done(&state->base); - if (intel_state->modeset) { + if (state->modeset) { /* As one of the primary mmio accessors, KMS has a high * likelihood of triggering bugs in unclaimed access. After we * finish modesetting, see if an error has been flagged, and if @@ -13939,7 +15458,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref); } - intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref); + intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); /* * Defer the cleanup of the old state to a separate worker to not @@ -13949,14 +15468,14 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * schedule point (cond_resched()) here anyway to keep latencies * down. */ - INIT_WORK(&state->commit_work, intel_atomic_cleanup_work); - queue_work(system_highpri_wq, &state->commit_work); + INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work); + queue_work(system_highpri_wq, &state->base.commit_work); } static void intel_atomic_commit_work(struct work_struct *work) { - struct drm_atomic_state *state = - container_of(work, struct drm_atomic_state, commit_work); + struct intel_atomic_state *state = + container_of(work, struct intel_atomic_state, base.commit_work); intel_atomic_commit_tail(state); } @@ -13986,42 +15505,39 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, return NOTIFY_DONE; } -static void intel_atomic_track_fbs(struct drm_atomic_state *state) +static void intel_atomic_track_fbs(struct intel_atomic_state *state) { - struct drm_plane_state *old_plane_state, *new_plane_state; - struct drm_plane *plane; + struct intel_plane_state *old_plane_state, *new_plane_state; + struct intel_plane *plane; int i; - for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) - i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), - intel_fb_obj(new_plane_state->fb), - to_intel_plane(plane)->frontbuffer_bit); + for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, + new_plane_state, i) + intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->hw.fb), + to_intel_frontbuffer(new_plane_state->hw.fb), + plane->frontbuffer_bit); +} + +static void assert_global_state_locked(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&dev_priv->drm, crtc) + drm_modeset_lock_assert_held(&crtc->base.mutex); } -/** - * intel_atomic_commit - commit validated state object - * @dev: DRM device - * @state: the top-level driver state object - * @nonblock: nonblocking commit - * - * This function commits a top-level state object that has been validated - * with drm_atomic_helper_check(). - * - * RETURNS - * Zero for success or -errno. - */ static int intel_atomic_commit(struct drm_device *dev, - struct drm_atomic_state *state, + struct drm_atomic_state *_state, bool nonblock) { - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct intel_atomic_state *state = to_intel_atomic_state(_state); struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - intel_state->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + state->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - drm_atomic_state_get(state); - i915_sw_fence_init(&intel_state->commit_ready, + drm_atomic_state_get(&state->base); + i915_sw_fence_init(&state->commit_ready, intel_atomic_commit_ready); /* @@ -14041,63 +15557,63 @@ static int intel_atomic_commit(struct drm_device *dev, * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. */ - if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { + if (INTEL_GEN(dev_priv) < 9 && state->base.legacy_cursor_update) { struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; int i; - for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) if (new_crtc_state->wm.need_postvbl_update || new_crtc_state->update_wm_post) - state->legacy_cursor_update = false; + state->base.legacy_cursor_update = false; } - ret = intel_atomic_prepare_commit(dev, state); + ret = intel_atomic_prepare_commit(state); if (ret) { DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&intel_state->commit_ready); - intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref); + i915_sw_fence_commit(&state->commit_ready); + intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } - ret = drm_atomic_helper_setup_commit(state, nonblock); + ret = drm_atomic_helper_setup_commit(&state->base, nonblock); if (!ret) - ret = drm_atomic_helper_swap_state(state, true); + ret = drm_atomic_helper_swap_state(&state->base, true); if (ret) { - i915_sw_fence_commit(&intel_state->commit_ready); + i915_sw_fence_commit(&state->commit_ready); - drm_atomic_helper_cleanup_planes(dev, state); - intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref); + drm_atomic_helper_cleanup_planes(dev, &state->base); + intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } dev_priv->wm.distrust_bios_wm = false; intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); - if (intel_state->modeset) { - memcpy(dev_priv->min_cdclk, intel_state->min_cdclk, - sizeof(intel_state->min_cdclk)); - memcpy(dev_priv->min_voltage_level, - intel_state->min_voltage_level, - sizeof(intel_state->min_voltage_level)); - dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->cdclk.force_min_cdclk = - intel_state->cdclk.force_min_cdclk; + if (state->global_state_changed) { + assert_global_state_locked(dev_priv); - intel_cdclk_swap_state(intel_state); + memcpy(dev_priv->min_cdclk, state->min_cdclk, + sizeof(state->min_cdclk)); + memcpy(dev_priv->min_voltage_level, state->min_voltage_level, + sizeof(state->min_voltage_level)); + dev_priv->active_pipes = state->active_pipes; + dev_priv->cdclk.force_min_cdclk = state->cdclk.force_min_cdclk; + + intel_cdclk_swap_state(state); } - drm_atomic_state_get(state); - INIT_WORK(&state->commit_work, intel_atomic_commit_work); + drm_atomic_state_get(&state->base); + INIT_WORK(&state->base.commit_work, intel_atomic_commit_work); - i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock && intel_state->modeset) { - queue_work(dev_priv->modeset_wq, &state->commit_work); + i915_sw_fence_commit(&state->commit_ready); + if (nonblock && state->modeset) { + queue_work(dev_priv->modeset_wq, &state->base.commit_work); } else if (nonblock) { - queue_work(system_unbound_wq, &state->commit_work); + queue_work(dev_priv->flip_wq, &state->base.commit_work); } else { - if (intel_state->modeset) + if (state->modeset) flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); } @@ -14105,18 +15621,6 @@ static int intel_atomic_commit(struct drm_device *dev, return 0; } -static const struct drm_crtc_funcs intel_crtc_funcs = { - .gamma_set = drm_atomic_helper_legacy_gamma_set, - .set_config = drm_atomic_helper_set_config, - .destroy = intel_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = intel_crtc_duplicate_state, - .atomic_destroy_state = intel_crtc_destroy_state, - .set_crc_source = intel_crtc_set_crc_source, - .verify_crc_source = intel_crtc_verify_crc_source, - .get_crc_sources = intel_crtc_get_crc_sources, -}; - struct wait_rps_boost { struct wait_queue_entry wait; @@ -14136,7 +15640,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, * vblank without our intervention, so leave RPS alone. */ if (!i915_request_started(rq)) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); @@ -14177,9 +15681,9 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc, static int intel_plane_pin_fb(struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - struct drm_framebuffer *fb = plane_state->base.fb; + struct drm_framebuffer *fb = plane_state->hw.fb; struct i915_vma *vma; if (plane->id == PLANE_CURSOR && @@ -14217,7 +15721,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) { struct i915_sched_attr attr = { - .priority = I915_PRIORITY_DISPLAY, + .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), }; i915_gem_object_wait_priority(obj, 0, &attr); @@ -14226,33 +15730,33 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for - * @new_state: the plane state being prepared + * @_new_plane_state: the plane state being prepared * * Prepares a framebuffer for usage on a display plane. Generally this * involves pinning the underlying object and updating the frontbuffer tracking * bits. Some older platforms need special physical address handling for * cursor planes. * - * Must be called with struct_mutex held. - * * Returns 0 on success, negative error code on failure. */ int intel_prepare_plane_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) + struct drm_plane_state *_new_plane_state) { + struct intel_plane_state *new_plane_state = + to_intel_plane_state(_new_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_state->state); + to_intel_atomic_state(new_plane_state->uapi.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_framebuffer *fb = new_state->fb; + struct drm_framebuffer *fb = new_plane_state->hw.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; if (old_obj) { - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(new_state->state, - plane->state->crtc); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(intel_state, + to_intel_crtc(plane->state->crtc)); /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the @@ -14275,9 +15779,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, } } - if (new_state->fence) { /* explicit fencing */ + if (new_plane_state->uapi.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready, - new_state->fence, + new_plane_state->uapi.fence, I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) @@ -14291,23 +15795,16 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } - - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); + ret = intel_plane_pin_fb(new_plane_state); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; fb_obj_bump_render_priority(obj); - intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB); - if (!new_state->fence) { /* implicit fencing */ + if (!new_plane_state->uapi.fence) { /* implicit fencing */ struct dma_fence *fence; ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, @@ -14317,13 +15814,15 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret < 0) return ret; - fence = reservation_object_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_rcu(obj->base.resv); if (fence) { - add_rps_boost_after_vblank(new_state->crtc, fence); + add_rps_boost_after_vblank(new_plane_state->hw.crtc, + fence); dma_fence_put(fence); } } else { - add_rps_boost_after_vblank(new_state->crtc, new_state->fence); + add_rps_boost_after_vblank(new_plane_state->hw.crtc, + new_plane_state->uapi.fence); } /* @@ -14335,7 +15834,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, * maximum clocks following a vblank miss (see do_rps_boost()). */ if (!intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, true); + intel_rps_mark_interactive(&dev_priv->gt.rps, true); intel_state->rps_interactive = true; } @@ -14345,130 +15844,27 @@ intel_prepare_plane_fb(struct drm_plane *plane, /** * intel_cleanup_plane_fb - Cleans up an fb after plane use * @plane: drm plane to clean up for - * @old_state: the state from the previous modeset + * @_old_plane_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. - * - * Must be called with struct_mutex held. */ void intel_cleanup_plane_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) + struct drm_plane_state *_old_plane_state) { + struct intel_plane_state *old_plane_state = + to_intel_plane_state(_old_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state->state); + to_intel_atomic_state(old_plane_state->uapi.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, false); + intel_rps_mark_interactive(&dev_priv->gt.rps, false); intel_state->rps_interactive = false; } /* Should only be called after a successful intel_prepare_plane_fb()! */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_plane_unpin_fb(to_intel_plane_state(old_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); -} - -int -skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int max_scale, mult; - int crtc_clock, max_dotclk, tmpclk1, tmpclk2; - - if (!crtc_state->base.enable) - return DRM_PLANE_HELPER_NO_SCALING; - - crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - max_dotclk *= 2; - - if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) - return DRM_PLANE_HELPER_NO_SCALING; - - /* - * skl max scale is lower of: - * close to 3 but not 3, -1 is for that purpose - * or - * cdclk/crtc_clock - */ - mult = is_planar_yuv_format(pixel_format) ? 2 : 3; - tmpclk1 = (1 << 16) * mult - 1; - tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); - max_scale = min(tmpclk1, tmpclk2); - - return max_scale; -} - -static void intel_begin_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - bool modeset = needs_modeset(&new_crtc_state->base); - - /* Perform vblank evasion around commit operation */ - intel_pipe_update_start(new_crtc_state); - - if (modeset) - goto out; - - if (new_crtc_state->base.color_mgmt_changed || - new_crtc_state->update_pipe) - intel_color_commit(new_crtc_state); - - if (new_crtc_state->update_pipe) - intel_update_pipe_config(old_crtc_state, new_crtc_state); - else if (INTEL_GEN(dev_priv) >= 9) - skl_detach_scalers(new_crtc_state); - - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipemisc(new_crtc_state); - -out: - if (dev_priv->display.atomic_update_watermarks) - dev_priv->display.atomic_update_watermarks(state, - new_crtc_state); -} - -void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - if (!IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - - if (crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } -} - -static void intel_finish_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - - intel_pipe_update_end(new_crtc_state); - - if (new_crtc_state->update_pipe && - !needs_modeset(&new_crtc_state->base) && - old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) - intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); + intel_plane_unpin_fb(old_plane_state); } /** @@ -14523,8 +15919,13 @@ static bool i965_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XBGR16161616F: return modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED; default: @@ -14558,8 +15959,8 @@ static const struct drm_plane_funcs i8xx_plane_funcs = { }; static int -intel_legacy_cursor_update(struct drm_plane *plane, - struct drm_crtc *crtc, +intel_legacy_cursor_update(struct drm_plane *_plane, + struct drm_crtc *_crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, @@ -14567,31 +15968,31 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - int ret; - struct drm_plane_state *old_plane_state, *new_plane_state; - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *old_fb; + struct intel_plane *plane = to_intel_plane(_plane); + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct intel_plane_state *old_plane_state = + to_intel_plane_state(plane->base.state); + struct intel_plane_state *new_plane_state; struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->state); + to_intel_crtc_state(crtc->base.state); struct intel_crtc_state *new_crtc_state; + int ret; /* * When crtc is inactive or there is a modeset pending, * wait for it to complete in the slowpath */ - if (!crtc_state->base.active || needs_modeset(&crtc_state->base) || + if (!crtc_state->hw.active || needs_modeset(crtc_state) || crtc_state->update_pipe) goto slow; - old_plane_state = plane->state; /* * Don't do an async update if there is an outstanding commit modifying * the plane. This prevents our async update's changes from getting * overridden by a previous synchronous update's state. */ - if (old_plane_state->commit && - !try_wait_for_completion(&old_plane_state->commit->hw_done)) + if (old_plane_state->uapi.commit && + !try_wait_for_completion(&old_plane_state->uapi.commit->hw_done)) goto slow; /* @@ -14599,57 +16000,52 @@ intel_legacy_cursor_update(struct drm_plane *plane, * take the slowpath. Only changing fb or position should be * in the fastpath. */ - if (old_plane_state->crtc != crtc || - old_plane_state->src_w != src_w || - old_plane_state->src_h != src_h || - old_plane_state->crtc_w != crtc_w || - old_plane_state->crtc_h != crtc_h || - !old_plane_state->fb != !fb) + if (old_plane_state->uapi.crtc != &crtc->base || + old_plane_state->uapi.src_w != src_w || + old_plane_state->uapi.src_h != src_h || + old_plane_state->uapi.crtc_w != crtc_w || + old_plane_state->uapi.crtc_h != crtc_h || + !old_plane_state->uapi.fb != !fb) goto slow; - new_plane_state = intel_plane_duplicate_state(plane); + new_plane_state = to_intel_plane_state(intel_plane_duplicate_state(&plane->base)); if (!new_plane_state) return -ENOMEM; - new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc)); + new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(&crtc->base)); if (!new_crtc_state) { ret = -ENOMEM; goto out_free; } - drm_atomic_set_fb_for_plane(new_plane_state, fb); + drm_atomic_set_fb_for_plane(&new_plane_state->uapi, fb); - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; + new_plane_state->uapi.src_x = src_x; + new_plane_state->uapi.src_y = src_y; + new_plane_state->uapi.src_w = src_w; + new_plane_state->uapi.src_h = src_h; + new_plane_state->uapi.crtc_x = crtc_x; + new_plane_state->uapi.crtc_y = crtc_y; + new_plane_state->uapi.crtc_w = crtc_w; + new_plane_state->uapi.crtc_h = crtc_h; ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state, - to_intel_plane_state(old_plane_state), - to_intel_plane_state(new_plane_state)); + old_plane_state, new_plane_state); if (ret) goto out_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); + ret = intel_plane_pin_fb(new_plane_state); if (ret) goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); - if (ret) - goto out_unlock; - - intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP); - - old_fb = old_plane_state->fb; - i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb), - intel_plane->frontbuffer_bit); + intel_frontbuffer_flush(to_intel_frontbuffer(new_plane_state->hw.fb), + ORIGIN_FLIP); + intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->hw.fb), + to_intel_frontbuffer(new_plane_state->hw.fb), + plane->frontbuffer_bit); /* Swap plane state */ - plane->state = new_plane_state; + plane->base.state = &new_plane_state->uapi; /* * We cannot swap crtc_state as it may be in use by an atomic commit or @@ -14663,27 +16059,24 @@ intel_legacy_cursor_update(struct drm_plane *plane, */ crtc_state->active_planes = new_crtc_state->active_planes; - if (plane->state->visible) - intel_update_plane(intel_plane, crtc_state, - to_intel_plane_state(plane->state)); + if (new_plane_state->uapi.visible) + intel_update_plane(plane, crtc_state, new_plane_state); else - intel_disable_plane(intel_plane, crtc_state); + intel_disable_plane(plane, crtc_state); - intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); + intel_plane_unpin_fb(old_plane_state); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); out_free: if (new_crtc_state) - intel_crtc_destroy_state(crtc, &new_crtc_state->base); + intel_crtc_destroy_state(&crtc->base, &new_crtc_state->uapi); if (ret) - intel_plane_destroy_state(plane, new_plane_state); + intel_plane_destroy_state(&plane->base, &new_plane_state->uapi); else - intel_plane_destroy_state(plane, old_plane_state); + intel_plane_destroy_state(&plane->base, &old_plane_state->uapi); return ret; slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, + return drm_atomic_helper_update_plane(&plane->base, &crtc->base, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h, ctx); } @@ -14721,10 +16114,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) const struct drm_plane_funcs *plane_funcs; unsigned int supported_rotations; unsigned int possible_crtcs; - const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -14753,44 +16145,69 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) fbc->possible_framebuffer_bits |= plane->frontbuffer_bit; } - if (INTEL_GEN(dev_priv) >= 4) { - formats = i965_primary_formats; - num_formats = ARRAY_SIZE(i965_primary_formats); - modifiers = i9xx_format_modifiers; - - plane->max_stride = i9xx_plane_max_stride; - plane->update_plane = i9xx_update_plane; - plane->disable_plane = i9xx_disable_plane; - plane->get_hw_state = i9xx_plane_get_hw_state; - plane->check_plane = i9xx_plane_check; - - plane_funcs = &i965_plane_funcs; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + formats = vlv_primary_formats; + num_formats = ARRAY_SIZE(vlv_primary_formats); + } else if (INTEL_GEN(dev_priv) >= 4) { + /* + * WaFP16GammaEnabling:ivb + * "Workaround : When using the 64-bit format, the plane + * output on each color channel has one quarter amplitude. + * It can be brought up to full amplitude by using pipe + * gamma correction or pipe color space conversion to + * multiply the plane output by four." + * + * There is no dedicated plane gamma for the primary plane, + * and using the pipe gamma/csc could conflict with other + * planes, so we choose not to expose fp16 on IVB primary + * planes. HSW primary planes no longer have this problem. + */ + if (IS_IVYBRIDGE(dev_priv)) { + formats = ivb_primary_formats; + num_formats = ARRAY_SIZE(ivb_primary_formats); + } else { + formats = i965_primary_formats; + num_formats = ARRAY_SIZE(i965_primary_formats); + } } else { formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); - modifiers = i9xx_format_modifiers; - - plane->max_stride = i9xx_plane_max_stride; - plane->update_plane = i9xx_update_plane; - plane->disable_plane = i9xx_disable_plane; - plane->get_hw_state = i9xx_plane_get_hw_state; - plane->check_plane = i9xx_plane_check; + } + if (INTEL_GEN(dev_priv) >= 4) + plane_funcs = &i965_plane_funcs; + else plane_funcs = &i8xx_plane_funcs; - } + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + plane->min_cdclk = vlv_plane_min_cdclk; + else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + plane->min_cdclk = hsw_plane_min_cdclk; + else if (IS_IVYBRIDGE(dev_priv)) + plane->min_cdclk = ivb_plane_min_cdclk; + else + plane->min_cdclk = i9xx_plane_min_cdclk; + + plane->max_stride = i9xx_plane_max_stride; + plane->update_plane = i9xx_update_plane; + plane->disable_plane = i9xx_disable_plane; + plane->get_hw_state = i9xx_plane_get_hw_state; + plane->check_plane = i9xx_plane_check; possible_crtcs = BIT(pipe); if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, possible_crtcs, plane_funcs, - formats, num_formats, modifiers, + formats, num_formats, + i9xx_format_modifiers, DRM_PLANE_TYPE_PRIMARY, "primary %c", pipe_name(pipe)); else ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, possible_crtcs, plane_funcs, - formats, num_formats, modifiers, + formats, num_formats, + i9xx_format_modifiers, DRM_PLANE_TYPE_PRIMARY, "plane %c", plane_name(plane->i9xx_plane)); @@ -14813,6 +16230,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_MODE_ROTATE_0, supported_rotations); + zpos = 0; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -14829,7 +16249,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, { unsigned int possible_crtcs; struct intel_plane *cursor; - int ret; + int ret, zpos; cursor = intel_plane_alloc(); if (IS_ERR(cursor)) @@ -14878,6 +16298,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180); + zpos = RUNTIME_INFO(dev_priv)->num_sprites[pipe] + 1; + drm_plane_create_zpos_immutable_property(&cursor->base, zpos); + drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); return cursor; @@ -14888,54 +16311,120 @@ fail: return ERR_PTR(ret); } -static void intel_crtc_init_scalers(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct intel_crtc_scaler_state *scaler_state = - &crtc_state->scaler_state; - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int i; +#define INTEL_CRTC_FUNCS \ + .gamma_set = drm_atomic_helper_legacy_gamma_set, \ + .set_config = drm_atomic_helper_set_config, \ + .destroy = intel_crtc_destroy, \ + .page_flip = drm_atomic_helper_page_flip, \ + .atomic_duplicate_state = intel_crtc_duplicate_state, \ + .atomic_destroy_state = intel_crtc_destroy_state, \ + .set_crc_source = intel_crtc_set_crc_source, \ + .verify_crc_source = intel_crtc_verify_crc_source, \ + .get_crc_sources = intel_crtc_get_crc_sources - crtc->num_scalers = RUNTIME_INFO(dev_priv)->num_scalers[crtc->pipe]; - if (!crtc->num_scalers) - return; +static const struct drm_crtc_funcs bdw_crtc_funcs = { + INTEL_CRTC_FUNCS, - for (i = 0; i < crtc->num_scalers; i++) { - struct intel_scaler *scaler = &scaler_state->scalers[i]; + .get_vblank_counter = g4x_get_vblank_counter, + .enable_vblank = bdw_enable_vblank, + .disable_vblank = bdw_disable_vblank, +}; + +static const struct drm_crtc_funcs ilk_crtc_funcs = { + INTEL_CRTC_FUNCS, + + .get_vblank_counter = g4x_get_vblank_counter, + .enable_vblank = ilk_enable_vblank, + .disable_vblank = ilk_disable_vblank, +}; + +static const struct drm_crtc_funcs g4x_crtc_funcs = { + INTEL_CRTC_FUNCS, + + .get_vblank_counter = g4x_get_vblank_counter, + .enable_vblank = i965_enable_vblank, + .disable_vblank = i965_disable_vblank, +}; + +static const struct drm_crtc_funcs i965_crtc_funcs = { + INTEL_CRTC_FUNCS, + + .get_vblank_counter = i915_get_vblank_counter, + .enable_vblank = i965_enable_vblank, + .disable_vblank = i965_disable_vblank, +}; + +static const struct drm_crtc_funcs i915gm_crtc_funcs = { + INTEL_CRTC_FUNCS, - scaler->in_use = 0; - scaler->mode = 0; + .get_vblank_counter = i915_get_vblank_counter, + .enable_vblank = i915gm_enable_vblank, + .disable_vblank = i915gm_disable_vblank, +}; + +static const struct drm_crtc_funcs i915_crtc_funcs = { + INTEL_CRTC_FUNCS, + + .get_vblank_counter = i915_get_vblank_counter, + .enable_vblank = i8xx_enable_vblank, + .disable_vblank = i8xx_disable_vblank, +}; + +static const struct drm_crtc_funcs i8xx_crtc_funcs = { + INTEL_CRTC_FUNCS, + + /* no hw vblank counter */ + .enable_vblank = i8xx_enable_vblank, + .disable_vblank = i8xx_disable_vblank, +}; + +static struct intel_crtc *intel_crtc_alloc(void) +{ + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + + crtc = kzalloc(sizeof(*crtc), GFP_KERNEL); + if (!crtc) + return ERR_PTR(-ENOMEM); + + crtc_state = intel_crtc_state_alloc(crtc); + if (!crtc_state) { + kfree(crtc); + return ERR_PTR(-ENOMEM); } - scaler_state->scaler_id = -1; + crtc->base.state = &crtc_state->uapi; + crtc->config = crtc_state; + + return crtc; +} + +static void intel_crtc_free(struct intel_crtc *crtc) +{ + intel_crtc_destroy_state(&crtc->base, crtc->base.state); + kfree(crtc); } static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *intel_crtc; - struct intel_crtc_state *crtc_state = NULL; - struct intel_plane *primary = NULL; - struct intel_plane *cursor = NULL; + struct intel_plane *primary, *cursor; + const struct drm_crtc_funcs *funcs; + struct intel_crtc *crtc; int sprite, ret; - intel_crtc = kzalloc(sizeof(*intel_crtc), GFP_KERNEL); - if (!intel_crtc) - return -ENOMEM; + crtc = intel_crtc_alloc(); + if (IS_ERR(crtc)) + return PTR_ERR(crtc); - crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); - if (!crtc_state) { - ret = -ENOMEM; - goto fail; - } - __drm_atomic_helper_crtc_reset(&intel_crtc->base, &crtc_state->base); - intel_crtc->config = crtc_state; + crtc->pipe = pipe; + crtc->num_scalers = RUNTIME_INFO(dev_priv)->num_scalers[pipe]; primary = intel_primary_plane_create(dev_priv, pipe); if (IS_ERR(primary)) { ret = PTR_ERR(primary); goto fail; } - intel_crtc->plane_ids_mask |= BIT(primary->id); + crtc->plane_ids_mask |= BIT(primary->id); for_each_sprite(dev_priv, pipe, sprite) { struct intel_plane *plane; @@ -14945,7 +16434,7 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) ret = PTR_ERR(plane); goto fail; } - intel_crtc->plane_ids_mask |= BIT(plane->id); + crtc->plane_ids_mask |= BIT(plane->id); } cursor = intel_cursor_plane_create(dev_priv, pipe); @@ -14953,47 +16442,53 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) ret = PTR_ERR(cursor); goto fail; } - intel_crtc->plane_ids_mask |= BIT(cursor->id); + crtc->plane_ids_mask |= BIT(cursor->id); + + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_G4X(dev_priv)) + funcs = &g4x_crtc_funcs; + else if (IS_GEN(dev_priv, 4)) + funcs = &i965_crtc_funcs; + else if (IS_I945GM(dev_priv) || IS_I915GM(dev_priv)) + funcs = &i915gm_crtc_funcs; + else if (IS_GEN(dev_priv, 3)) + funcs = &i915_crtc_funcs; + else + funcs = &i8xx_crtc_funcs; + } else { + if (INTEL_GEN(dev_priv) >= 8) + funcs = &bdw_crtc_funcs; + else + funcs = &ilk_crtc_funcs; + } - ret = drm_crtc_init_with_planes(&dev_priv->drm, &intel_crtc->base, + ret = drm_crtc_init_with_planes(&dev_priv->drm, &crtc->base, &primary->base, &cursor->base, - &intel_crtc_funcs, - "pipe %c", pipe_name(pipe)); + funcs, "pipe %c", pipe_name(pipe)); if (ret) goto fail; - intel_crtc->pipe = pipe; - - /* initialize shared scalers */ - intel_crtc_init_scalers(intel_crtc, crtc_state); - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) || dev_priv->pipe_to_crtc_mapping[pipe] != NULL); - dev_priv->pipe_to_crtc_mapping[pipe] = intel_crtc; + dev_priv->pipe_to_crtc_mapping[pipe] = crtc; if (INTEL_GEN(dev_priv) < 9) { enum i9xx_plane_id i9xx_plane = primary->i9xx_plane; BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL); - dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc; + dev_priv->plane_to_crtc_mapping[i9xx_plane] = crtc; } - drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); - - intel_color_init(intel_crtc); + intel_color_init(crtc); - WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe); + WARN_ON(drm_crtc_index(&crtc->base) != crtc->pipe); return 0; fail: - /* - * drm_mode_config_cleanup() will free up any - * crtcs/planes already initialized. - */ - kfree(crtc_state); - kfree(intel_crtc); + intel_crtc_free(crtc); return ret; } @@ -15015,21 +16510,32 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, return 0; } -static int intel_encoder_clones(struct intel_encoder *encoder) +static u32 intel_encoder_possible_clones(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct intel_encoder *source_encoder; - int index_mask = 0; - int entry = 0; + u32 possible_clones = 0; for_each_intel_encoder(dev, source_encoder) { if (encoders_cloneable(encoder, source_encoder)) - index_mask |= (1 << entry); + possible_clones |= drm_encoder_mask(&source_encoder->base); + } + + return possible_clones; +} + +static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) +{ + struct drm_device *dev = encoder->base.dev; + struct intel_crtc *crtc; + u32 possible_crtcs = 0; - entry++; + for_each_intel_crtc(dev, crtc) { + if (encoder->pipe_mask & BIT(crtc->pipe)) + possible_crtcs |= drm_crtc_mask(&crtc->base); } - return index_mask; + return possible_crtcs; } static bool ilk_has_edp_a(struct drm_i915_private *dev_priv) @@ -15111,15 +16617,26 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_pps_init(dev_priv); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return; - if (IS_ELKHARTLAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); + intel_ddi_init(dev_priv, PORT_F); + intel_ddi_init(dev_priv, PORT_G); + intel_ddi_init(dev_priv, PORT_H); + intel_ddi_init(dev_priv, PORT_I); + icl_dsi_init(dev_priv); + } else if (IS_ELKHARTLAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D); icl_dsi_init(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 11) { + } else if (IS_GEN(dev_priv, 11)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); @@ -15321,9 +16838,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_psr_init(dev_priv); for_each_intel_encoder(&dev_priv->drm, encoder) { - encoder->base.possible_crtcs = encoder->crtc_mask; + encoder->base.possible_crtcs = + intel_encoder_possible_crtcs(encoder); encoder->base.possible_clones = - intel_encoder_clones(encoder); + intel_encoder_possible_clones(encoder); } intel_init_pch_refclk(dev_priv); @@ -15334,15 +16852,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb_obj(fb); drm_framebuffer_cleanup(fb); - - i915_gem_object_lock(obj); - WARN_ON(!obj->framebuffer_references--); - i915_gem_object_unlock(obj); - - i915_gem_object_put(obj); + intel_frontbuffer_put(intel_fb->frontbuffer); kfree(intel_fb); } @@ -15370,7 +16882,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_i915_gem_object *obj = intel_fb_obj(fb); i915_gem_object_flush_if_display(obj); - intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); return 0; } @@ -15392,8 +16904,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, int ret = -EINVAL; int i; + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) + return -ENOMEM; + i915_gem_object_lock(obj); - obj->framebuffer_references++; tiling = i915_gem_object_get_tiling(obj); stride = i915_gem_object_get_stride(obj); i915_gem_object_unlock(obj); @@ -15460,8 +16975,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ - if (mode_cmd->offsets[0] != 0) + if (mode_cmd->offsets[0] != 0) { + DRM_DEBUG_KMS("plane 0 offset (0x%08x) must be 0\n", + mode_cmd->offsets[0]); goto err; + } drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); @@ -15474,26 +16992,23 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } stride_alignment = intel_fb_stride_alignment(fb, i); - - /* - * Display WA #0531: skl,bxt,kbl,glk - * - * Render decompression and plane width > 3840 - * combined with horizontal panning requires the - * plane stride to be a multiple of 4. We'll just - * require the entire fb to accommodate that to avoid - * potential runtime errors at plane configuration time. - */ - if (IS_GEN(dev_priv, 9) && i == 0 && fb->width > 3840 && - is_ccs_modifier(fb->modifier)) - stride_alignment *= 4; - if (fb->pitches[i] & (stride_alignment - 1)) { DRM_DEBUG_KMS("plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); goto err; } + if (is_gen12_ccs_plane(fb, i)) { + int ccs_aux_stride = gen12_ccs_aux_stride(fb, i); + + if (fb->pitches[i] != ccs_aux_stride) { + DRM_DEBUG_KMS("ccs aux plane %d pitch (%d) must be %d\n", + i, + fb->pitches[i], ccs_aux_stride); + goto err; + } + } + fb->obj[i] = &obj->base; } @@ -15510,9 +17025,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, return 0; err: - i915_gem_object_lock(obj); - obj->framebuffer_references--; - i915_gem_object_unlock(obj); + intel_frontbuffer_put(intel_fb->frontbuffer); return ret; } @@ -15530,8 +17043,7 @@ intel_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(obj, &mode_cmd); - if (IS_ERR(fb)) - i915_gem_object_put(obj); + i915_gem_object_put(obj); return fb; } @@ -15584,8 +17096,14 @@ intel_mode_valid(struct drm_device *dev, DRM_MODE_FLAG_CLKDIV2)) return MODE_BAD; - if (INTEL_GEN(dev_priv) >= 9 || - IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { + /* Transcoder timing limits */ + if (INTEL_GEN(dev_priv) >= 11) { + hdisplay_max = 16384; + vdisplay_max = 8192; + htotal_max = 16384; + vtotal_max = 8192; + } else if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { hdisplay_max = 8192; /* FDI max 4096 handled elsewhere */ vdisplay_max = 4096; htotal_max = 8192; @@ -15614,6 +17132,56 @@ intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + if (INTEL_GEN(dev_priv) >= 5) { + if (mode->hdisplay < 64 || + mode->htotal - mode->hdisplay < 32) + return MODE_H_ILLEGAL; + + if (mode->vtotal - mode->vdisplay < 5) + return MODE_V_ILLEGAL; + } else { + if (mode->htotal - mode->hdisplay < 32) + return MODE_H_ILLEGAL; + + if (mode->vtotal - mode->vdisplay < 3) + return MODE_V_ILLEGAL; + } + + return MODE_OK; +} + +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + int plane_width_max, plane_height_max; + + /* + * intel_mode_valid() should be + * sufficient on older platforms. + */ + if (INTEL_GEN(dev_priv) < 9) + return MODE_OK; + + /* + * Most people will probably want a fullscreen + * plane so let's not advertize modes that are + * too big for that. + */ + if (INTEL_GEN(dev_priv) >= 11) { + plane_width_max = 5120; + plane_height_max = 4320; + } else { + plane_width_max = 5120; + plane_height_max = 4096; + } + + if (mode->hdisplay > plane_width_max) + return MODE_H_ILLEGAL; + + if (mode->vdisplay > plane_height_max) + return MODE_V_ILLEGAL; + return MODE_OK; } @@ -15638,29 +17206,28 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) intel_init_cdclk_hooks(dev_priv); if (INTEL_GEN(dev_priv) >= 9) { - dev_priv->display.get_pipe_config = haswell_get_pipe_config; + dev_priv->display.get_pipe_config = hsw_get_pipe_config; dev_priv->display.get_initial_plane_config = - skylake_get_initial_plane_config; - dev_priv->display.crtc_compute_clock = - haswell_crtc_compute_clock; - dev_priv->display.crtc_enable = haswell_crtc_enable; - dev_priv->display.crtc_disable = haswell_crtc_disable; + skl_get_initial_plane_config; + dev_priv->display.crtc_compute_clock = hsw_crtc_compute_clock; + dev_priv->display.crtc_enable = hsw_crtc_enable; + dev_priv->display.crtc_disable = hsw_crtc_disable; } else if (HAS_DDI(dev_priv)) { - dev_priv->display.get_pipe_config = haswell_get_pipe_config; + dev_priv->display.get_pipe_config = hsw_get_pipe_config; dev_priv->display.get_initial_plane_config = i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = - haswell_crtc_compute_clock; - dev_priv->display.crtc_enable = haswell_crtc_enable; - dev_priv->display.crtc_disable = haswell_crtc_disable; + hsw_crtc_compute_clock; + dev_priv->display.crtc_enable = hsw_crtc_enable; + dev_priv->display.crtc_disable = hsw_crtc_disable; } else if (HAS_PCH_SPLIT(dev_priv)) { - dev_priv->display.get_pipe_config = ironlake_get_pipe_config; + dev_priv->display.get_pipe_config = ilk_get_pipe_config; dev_priv->display.get_initial_plane_config = i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = - ironlake_crtc_compute_clock; - dev_priv->display.crtc_enable = ironlake_crtc_enable; - dev_priv->display.crtc_disable = ironlake_crtc_disable; + ilk_crtc_compute_clock; + dev_priv->display.crtc_enable = ilk_crtc_enable; + dev_priv->display.crtc_disable = ilk_crtc_disable; } else if (IS_CHERRYVIEW(dev_priv)) { dev_priv->display.get_pipe_config = i9xx_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -15706,58 +17273,26 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } if (IS_GEN(dev_priv, 5)) { - dev_priv->display.fdi_link_train = ironlake_fdi_link_train; + dev_priv->display.fdi_link_train = ilk_fdi_link_train; } else if (IS_GEN(dev_priv, 6)) { dev_priv->display.fdi_link_train = gen6_fdi_link_train; } else if (IS_IVYBRIDGE(dev_priv)) { /* FIXME: detect B0+ stepping and use auto training */ dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - dev_priv->display.fdi_link_train = hsw_fdi_link_train; } if (INTEL_GEN(dev_priv) >= 9) - dev_priv->display.update_crtcs = skl_update_crtcs; + dev_priv->display.commit_modeset_enables = skl_commit_modeset_enables; else - dev_priv->display.update_crtcs = intel_update_crtcs; -} + dev_priv->display.commit_modeset_enables = intel_commit_modeset_enables; -static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return VLV_VGACNTRL; - else if (INTEL_GEN(dev_priv) >= 5) - return CPU_VGACNTRL; - else - return VGACNTRL; } -/* Disable the VGA plane that we never use */ -static void i915_disable_vga(struct drm_i915_private *dev_priv) +void intel_modeset_init_hw(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; - u8 sr1; - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(SR01, VGA_SR_INDEX); - sr1 = inb(VGA_SR_DATA); - outb(sr1 | 1<<5, VGA_SR_DATA); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - udelay(300); - - I915_WRITE(vga_reg, VGA_DISP_DISABLE); - POSTING_READ(vga_reg); -} - -void intel_modeset_init_hw(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; + intel_update_cdclk(i915); + intel_dump_cdclk_state(&i915->cdclk.hw, "Current CDCLK"); + i915->cdclk.logical = i915->cdclk.actual = i915->cdclk.hw; } /* @@ -15775,8 +17310,8 @@ static void sanitize_watermarks(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *state; struct intel_atomic_state *intel_state; - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; struct drm_modeset_acquire_ctx ctx; int ret; int i; @@ -15831,13 +17366,11 @@ retry: } /* Write calculated watermark values back */ - for_each_new_crtc_in_state(state, crtc, cstate, i) { - struct intel_crtc_state *cs = to_intel_crtc_state(cstate); - - cs->wm.need_postvbl_update = true; - dev_priv->display.optimize_watermarks(intel_state, cs); + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + crtc_state->wm.need_postvbl_update = true; + dev_priv->display.optimize_watermarks(intel_state, crtc); - to_intel_crtc_state(crtc->state)->wm = cs->wm; + to_intel_crtc_state(crtc->base.state)->wm = crtc_state->wm; } put_state: @@ -15867,8 +17400,7 @@ static int intel_initial_commit(struct drm_device *dev) { struct drm_atomic_state *state = NULL; struct drm_modeset_acquire_ctx ctx; - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct intel_crtc *crtc; int ret = 0; state = drm_atomic_state_alloc(dev); @@ -15880,15 +17412,17 @@ static int intel_initial_commit(struct drm_device *dev) retry: state->acquire_ctx = &ctx; - drm_for_each_crtc(crtc, dev) { - crtc_state = drm_atomic_get_crtc_state(state, crtc); + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_crtc_state(state, crtc); + if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto out; } - if (crtc_state->active) { - ret = drm_atomic_add_affected_planes(state, crtc); + if (crtc_state->hw.active) { + ret = drm_atomic_add_affected_planes(state, &crtc->base); if (ret) goto out; @@ -15898,7 +17432,7 @@ retry: * having a proper LUT loaded. Remove once we * have readout for pipe gamma enable. */ - crtc_state->color_mgmt_changed = true; + crtc_state->uapi.color_mgmt_changed = true; } } @@ -15919,117 +17453,111 @@ out: return ret; } -int intel_modeset_init(struct drm_device *dev) +static void intel_mode_config_init(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - enum pipe pipe; - struct intel_crtc *crtc; - int ret; - - dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + struct drm_mode_config *mode_config = &i915->drm.mode_config; - drm_mode_config_init(dev); + drm_mode_config_init(&i915->drm); - ret = intel_bw_init(dev_priv); - if (ret) - return ret; - - dev->mode_config.min_width = 0; - dev->mode_config.min_height = 0; - - dev->mode_config.preferred_depth = 24; - dev->mode_config.prefer_shadow = 1; - - dev->mode_config.allow_fb_modifiers = true; - - dev->mode_config.funcs = &intel_mode_funcs; - - init_llist_head(&dev_priv->atomic_helper.free_list); - INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state_worker); + mode_config->min_width = 0; + mode_config->min_height = 0; - intel_init_quirks(dev_priv); + mode_config->preferred_depth = 24; + mode_config->prefer_shadow = 1; - intel_fbc_init(dev_priv); - - intel_init_pm(dev_priv); - - /* - * There may be no VBT; and if the BIOS enabled SSC we can - * just keep using it to avoid unnecessary flicker. Whereas if the - * BIOS isn't using it, don't assume it will work even if the VBT - * indicates as much. - */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { - bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & - DREF_SSC1_ENABLE); + mode_config->allow_fb_modifiers = true; - if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { - DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n", - bios_lvds_use_ssc ? "en" : "dis", - dev_priv->vbt.lvds_use_ssc ? "en" : "dis"); - dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; - } - } + mode_config->funcs = &intel_mode_funcs; /* * Maximum framebuffer dimensions, chosen to match * the maximum render engine surface size on gen4+. */ - if (INTEL_GEN(dev_priv) >= 7) { - dev->mode_config.max_width = 16384; - dev->mode_config.max_height = 16384; - } else if (INTEL_GEN(dev_priv) >= 4) { - dev->mode_config.max_width = 8192; - dev->mode_config.max_height = 8192; - } else if (IS_GEN(dev_priv, 3)) { - dev->mode_config.max_width = 4096; - dev->mode_config.max_height = 4096; + if (INTEL_GEN(i915) >= 7) { + mode_config->max_width = 16384; + mode_config->max_height = 16384; + } else if (INTEL_GEN(i915) >= 4) { + mode_config->max_width = 8192; + mode_config->max_height = 8192; + } else if (IS_GEN(i915, 3)) { + mode_config->max_width = 4096; + mode_config->max_height = 4096; } else { - dev->mode_config.max_width = 2048; - dev->mode_config.max_height = 2048; + mode_config->max_width = 2048; + mode_config->max_height = 2048; } - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { - dev->mode_config.cursor_width = IS_I845G(dev_priv) ? 64 : 512; - dev->mode_config.cursor_height = 1023; - } else if (IS_GEN(dev_priv, 2)) { - dev->mode_config.cursor_width = 64; - dev->mode_config.cursor_height = 64; + if (IS_I845G(i915) || IS_I865G(i915)) { + mode_config->cursor_width = IS_I845G(i915) ? 64 : 512; + mode_config->cursor_height = 1023; + } else if (IS_GEN(i915, 2)) { + mode_config->cursor_width = 64; + mode_config->cursor_height = 64; } else { - dev->mode_config.cursor_width = 256; - dev->mode_config.cursor_height = 256; + mode_config->cursor_width = 256; + mode_config->cursor_height = 256; } +} - dev->mode_config.fb_base = ggtt->gmadr.start; +int intel_modeset_init(struct drm_i915_private *i915) +{ + struct drm_device *dev = &i915->drm; + enum pipe pipe; + struct intel_crtc *crtc; + int ret; - DRM_DEBUG_KMS("%d display pipe%s available.\n", - INTEL_INFO(dev_priv)->num_pipes, - INTEL_INFO(dev_priv)->num_pipes > 1 ? "s" : ""); + i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | + WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); - for_each_pipe(dev_priv, pipe) { - ret = intel_crtc_init(dev_priv, pipe); - if (ret) { - drm_mode_config_cleanup(dev); - return ret; + intel_mode_config_init(i915); + + ret = intel_bw_init(i915); + if (ret) + return ret; + + init_llist_head(&i915->atomic_helper.free_list); + INIT_WORK(&i915->atomic_helper.free_work, + intel_atomic_helper_free_state_worker); + + intel_init_quirks(i915); + + intel_fbc_init(i915); + + intel_init_pm(i915); + + intel_panel_sanitize_ssc(i915); + + intel_gmbus_setup(i915); + + DRM_DEBUG_KMS("%d display pipe%s available.\n", + INTEL_NUM_PIPES(i915), + INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); + + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + for_each_pipe(i915, pipe) { + ret = intel_crtc_init(i915, pipe); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } } } intel_shared_dpll_init(dev); - intel_update_fdi_pll_freq(dev_priv); + intel_update_fdi_pll_freq(i915); - intel_update_czclk(dev_priv); - intel_modeset_init_hw(dev); + intel_update_czclk(i915); + intel_modeset_init_hw(i915); - intel_hdcp_component_init(dev_priv); + intel_hdcp_component_init(i915); - if (dev_priv->max_cdclk_freq == 0) - intel_update_max_cdclk(dev_priv); + if (i915->max_cdclk_freq == 0) + intel_update_max_cdclk(i915); /* Just disable it once at startup */ - i915_disable_vga(dev_priv); - intel_setup_outputs(dev_priv); + intel_vga_disable(i915); + intel_setup_outputs(i915); drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); @@ -16048,8 +17576,7 @@ int intel_modeset_init(struct drm_device *dev) * can even allow for smooth boot transitions if the BIOS * fb is large enough for the active pipe configuration. */ - dev_priv->display.get_initial_plane_config(crtc, - &plane_config); + i915->display.get_initial_plane_config(crtc, &plane_config); /* * If the fb is shared between multiple heads, we'll @@ -16063,7 +17590,7 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH(dev_priv)) + if (!HAS_GMCH(i915)) sanitize_watermarks(dev); /* @@ -16228,31 +17755,76 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv, (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); } -static void intel_sanitize_crtc(struct intel_crtc *crtc, - struct drm_modeset_acquire_ctx *ctx) +static void intel_sanitize_frame_start_delay(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - /* Clear any frame start delays used for debugging left by the BIOS */ - if (crtc->active && !transcoder_is_dsi(cpu_transcoder)) { + if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { + i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder); + u32 val; + + if (transcoder_is_dsi(cpu_transcoder)) + return; + + val = I915_READ(reg); + val &= ~HSW_FRAME_START_DELAY_MASK; + val |= HSW_FRAME_START_DELAY(0); + I915_WRITE(reg, val); + } else { i915_reg_t reg = PIPECONF(cpu_transcoder); + u32 val; - I915_WRITE(reg, - I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); + val = I915_READ(reg); + val &= ~PIPECONF_FRAME_START_DELAY_MASK; + val |= PIPECONF_FRAME_START_DELAY(0); + I915_WRITE(reg, val); } - if (crtc_state->base.active) { + if (!crtc_state->has_pch_encoder) + return; + + if (HAS_PCH_IBX(dev_priv)) { + i915_reg_t reg = PCH_TRANSCONF(crtc->pipe); + u32 val; + + val = I915_READ(reg); + val &= ~TRANS_FRAME_START_DELAY_MASK; + val |= TRANS_FRAME_START_DELAY(0); + I915_WRITE(reg, val); + } else { + enum pipe pch_transcoder = intel_crtc_pch_transcoder(crtc); + i915_reg_t reg = TRANS_CHICKEN2(pch_transcoder); + u32 val; + + val = I915_READ(reg); + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val |= TRANS_CHICKEN2_FRAME_START_DELAY(0); + I915_WRITE(reg, val); + } +} + +static void intel_sanitize_crtc(struct intel_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + + if (crtc_state->hw.active) { struct intel_plane *plane; + /* Clear any frame start delays used for debugging left by the BIOS */ + intel_sanitize_frame_start_delay(crtc_state); + /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible && + if (plane_state->uapi.visible && plane->base.type != DRM_PLANE_TYPE_PRIMARY) intel_plane_disable_noatomic(crtc, plane); } @@ -16269,10 +17841,10 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ - if (crtc_state->base.active && !intel_crtc_has_encoders(crtc)) - intel_crtc_disable_noatomic(&crtc->base, ctx); + if (crtc_state->hw.active && !intel_crtc_has_encoders(crtc)) + intel_crtc_disable_noatomic(crtc, ctx); - if (crtc_state->base.active || HAS_GMCH(dev_priv)) { + if (crtc_state->hw.active || HAS_GMCH(dev_priv)) { /* * We start out with underrun reporting disabled to avoid races. * For correct bookkeeping mark this on active crtcs. @@ -16303,7 +17875,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); /* * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram @@ -16316,7 +17888,7 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) * road. */ return IS_GEN(dev_priv, 6) && - crtc_state->base.active && + crtc_state->hw.active && crtc_state->shared_dpll && crtc_state->port_clock == 0; } @@ -16333,7 +17905,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ bool has_active_crtc = crtc_state && - crtc_state->base.active; + crtc_state->hw.active; if (crtc_state && has_bogus_dpll_config(crtc_state)) { DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", @@ -16388,39 +17960,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) icl_sanitize_encoder_pll_mapping(encoder); } -void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) -{ - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { - DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); - i915_disable_vga(dev_priv); - } -} - -void i915_redisable_vga(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - /* - * This function can be called both from intel_modeset_setup_hw_state or - * at a very early point in our resume sequence, where the power well - * structures are not yet restored. Since this function is at a very - * paranoid "someone might have enabled VGA while we were not looking" - * level, just check if the power well is enabled instead of trying to - * follow the "don't touch the power well if we don't need it" policy - * the rest of the driver uses. - */ - wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_VGA); - if (!wakeref) - return; - - i915_redisable_vga_power_on(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref); -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct drm_i915_private *dev_priv) { @@ -16464,28 +18003,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct drm_connector_list_iter conn_iter; int i; - dev_priv->active_crtcs = 0; + dev_priv->active_pipes = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); - __drm_atomic_helper_crtc_destroy_state(&crtc_state->base); - memset(crtc_state, 0, sizeof(*crtc_state)); - __drm_atomic_helper_crtc_reset(&crtc->base, &crtc_state->base); + __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); + intel_crtc_free_hw_state(crtc_state); + intel_crtc_state_reset(crtc_state, crtc); - crtc_state->base.active = crtc_state->base.enable = + crtc_state->hw.active = crtc_state->hw.enable = dev_priv->display.get_pipe_config(crtc, crtc_state); - crtc->base.enabled = crtc_state->base.enable; - crtc->active = crtc_state->base.active; + crtc->base.enabled = crtc_state->hw.enable; + crtc->active = crtc_state->hw.active; - if (crtc_state->base.active) - dev_priv->active_crtcs |= 1 << crtc->pipe; + if (crtc_state->hw.active) + dev_priv->active_pipes |= BIT(crtc->pipe); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", crtc->base.base.id, crtc->base.name, - enableddisabled(crtc_state->base.active)); + enableddisabled(crtc_state->hw.active)); } readout_plane_state(dev_priv); @@ -16495,12 +18034,19 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pll->on = pll->info->funcs->get_hw_state(dev_priv, pll, &pll->state.hw_state); + + if (IS_ELKHARTLAKE(dev_priv) && pll->on && + pll->info->id == DPLL_ID_EHL_DPLL4) { + pll->wakeref = intel_display_power_get(dev_priv, + POWER_DOMAIN_DPLL_DC_OFF); + } + pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); - if (crtc_state->base.active && + if (crtc_state->hw.active && crtc_state->shared_dpll == pll) pll->state.crtc_mask |= 1 << crtc->pipe; } @@ -16534,24 +18080,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->get_hw_state(connector)) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + connector->base.dpms = DRM_MODE_DPMS_ON; encoder = connector->encoder; connector->base.encoder = &encoder->base; - if (encoder->base.crtc && - encoder->base.crtc->state->active) { + crtc = to_intel_crtc(encoder->base.crtc); + crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL; + + if (crtc_state && crtc_state->hw.active) { /* * This has to be done during hardware readout * because anything calling .crtc_disable may * rely on the connector_mask being accurate. */ - encoder->base.crtc->state->connector_mask |= + crtc_state->uapi.connector_mask |= drm_connector_mask(&connector->base); - encoder->base.crtc->state->encoder_mask |= + crtc_state->uapi.encoder_mask |= drm_encoder_mask(&encoder->base); } - } else { connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; @@ -16570,13 +18120,15 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct intel_plane *plane; int min_cdclk = 0; - memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); - if (crtc_state->base.active) { - intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); - crtc->base.mode.hdisplay = crtc_state->pipe_src_w; - crtc->base.mode.vdisplay = crtc_state->pipe_src_h; - intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); - WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); + if (crtc_state->hw.active) { + struct drm_display_mode *mode = &crtc_state->hw.mode; + + intel_mode_from_pipe_config(&crtc_state->hw.adjusted_mode, + crtc_state); + + *mode = crtc_state->hw.adjusted_mode; + mode->hdisplay = crtc_state->pipe_src_w; + mode->vdisplay = crtc_state->pipe_src_h; /* * The initial mode needs to be set in order to keep @@ -16587,25 +18139,15 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) * set a flag to indicate that a full recalculation is * needed on the next commit. */ - crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; + mode->private_flags = I915_MODE_FLAG_INHERITED; intel_crtc_compute_pixel_rate(crtc_state); - if (dev_priv->display.modeset_calc_cdclk) { - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (WARN_ON(min_cdclk < 0)) - min_cdclk = 0; - } + intel_crtc_update_active_timings(crtc_state); - drm_calc_timestamping_constants(&crtc->base, - &crtc_state->base.adjusted_mode); - update_scanline_offset(crtc_state); + intel_crtc_copy_hw_to_uapi_state(crtc_state); } - dev_priv->min_cdclk[crtc->pipe] = min_cdclk; - dev_priv->min_voltage_level[crtc->pipe] = - crtc_state->min_voltage_level; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -16614,11 +18156,37 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) * FIXME don't have the fb yet, so can't * use intel_plane_data_rate() :( */ - if (plane_state->base.visible) + if (plane_state->uapi.visible) crtc_state->data_rate[plane->id] = 4 * crtc_state->pixel_rate; + /* + * FIXME don't have the fb yet, so can't + * use plane->min_cdclk() :( + */ + if (plane_state->uapi.visible && plane->min_cdclk) { + if (crtc_state->double_wide || + INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + crtc_state->min_cdclk[plane->id] = + DIV_ROUND_UP(crtc_state->pixel_rate, 2); + else + crtc_state->min_cdclk[plane->id] = + crtc_state->pixel_rate; + } + DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk %d kHz\n", + plane->base.base.id, plane->base.name, + crtc_state->min_cdclk[plane->id]); + } + + if (crtc_state->hw.active) { + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; } + dev_priv->min_cdclk[crtc->pipe] = min_cdclk; + dev_priv->min_voltage_level[crtc->pipe] = + crtc_state->min_voltage_level; + intel_bw_crtc_update(bw_state, crtc_state); intel_pipe_config_sanity_check(dev_priv, crtc_state); @@ -16650,8 +18218,11 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) static void intel_early_display_was(struct drm_i915_private *dev_priv) { - /* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */ - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + /* + * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl + * Also known as Wa_14010480278. + */ + if (IS_GEN_RANGE(dev_priv, 10, 12) || IS_GEMINILAKE(dev_priv)) I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | DARBF_GATING_DIS); @@ -16732,7 +18303,6 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc_state *crtc_state; struct intel_encoder *encoder; struct intel_crtc *crtc; intel_wakeref_t wakeref; @@ -16744,6 +18314,17 @@ intel_modeset_setup_hw_state(struct drm_device *dev, intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + + /* Sanitize the TypeC port mode upfront, encoders depend on this */ + for_each_intel_encoder(dev, encoder) { + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + /* We need to sanitize only the MST primary port. */ + if (encoder->type != INTEL_OUTPUT_DP_MST && + intel_phy_is_tc(dev_priv, phy)) + intel_tc_port_sanitize(enc_to_dig_port(encoder)); + } + get_encoder_power_domains(dev_priv); if (HAS_PCH_IBX(dev_priv)) @@ -16754,11 +18335,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev, * waits, so we need vblank interrupts restored beforehand. */ for_each_intel_crtc(&dev_priv->drm, crtc) { - crtc_state = to_intel_crtc_state(crtc->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); drm_crtc_vblank_reset(&crtc->base); - if (crtc_state->base.active) + if (crtc_state->hw.active) intel_crtc_vblank_on(crtc_state); } @@ -16768,7 +18350,9 @@ intel_modeset_setup_hw_state(struct drm_device *dev, intel_sanitize_encoder(encoder); for_each_intel_crtc(&dev_priv->drm, crtc) { - crtc_state = to_intel_crtc_state(crtc->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc_state, NULL, "[setup_hw_state]"); } @@ -16801,17 +18385,16 @@ intel_modeset_setup_hw_state(struct drm_device *dev, } for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); u64 put_domains; - crtc_state = to_intel_crtc_state(crtc->base.state); - put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc_state); + put_domains = modeset_get_crtc_power_domains(crtc_state); if (WARN_ON(put_domains)) modeset_put_power_domains(dev_priv, put_domains); } intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); - - intel_fbc_init_pipe_state(dev_priv); } void intel_display_resume(struct drm_device *dev) @@ -16848,13 +18431,13 @@ void intel_display_resume(struct drm_device *dev) drm_atomic_state_put(state); } -static void intel_hpd_poll_fini(struct drm_device *dev) +static void intel_hpd_poll_fini(struct drm_i915_private *i915) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; /* Kill all the work that may have been queued by hpd. */ - drm_connector_list_iter_begin(dev, &conn_iter); + drm_connector_list_iter_begin(&i915->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) cancel_work_sync(&connector->modeset_retry_work); @@ -16866,78 +18449,58 @@ static void intel_hpd_poll_fini(struct drm_device *dev) drm_connector_list_iter_end(&conn_iter); } -void intel_modeset_cleanup(struct drm_device *dev) +void intel_modeset_driver_remove(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - flush_workqueue(dev_priv->modeset_wq); + flush_workqueue(i915->flip_wq); + flush_workqueue(i915->modeset_wq); - flush_work(&dev_priv->atomic_helper.free_work); - WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + flush_work(&i915->atomic_helper.free_work); + WARN_ON(!llist_empty(&i915->atomic_helper.free_list)); /* * Interrupts and polling as the first thing to avoid creating havoc. * Too much stuff here (turning of connectors, ...) would * experience fancy races otherwise. */ - intel_irq_uninstall(dev_priv); + intel_irq_uninstall(i915); /* * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. */ - intel_hpd_poll_fini(dev); + intel_hpd_poll_fini(i915); + + /* + * MST topology needs to be suspended so we don't have any calls to + * fbdev after it's finalized. MST will be destroyed later as part of + * drm_mode_config_cleanup() + */ + intel_dp_mst_suspend(i915); /* poll work can call into fbdev, hence clean that up afterwards */ - intel_fbdev_fini(dev_priv); + intel_fbdev_fini(i915); intel_unregister_dsm_handler(); - intel_fbc_global_disable(dev_priv); + intel_fbc_global_disable(i915); /* flush any delayed tasks or pending work */ flush_scheduled_work(); - intel_hdcp_component_fini(dev_priv); + intel_hdcp_component_fini(i915); - drm_mode_config_cleanup(dev); + drm_mode_config_cleanup(&i915->drm); - intel_overlay_cleanup(dev_priv); + intel_overlay_cleanup(i915); - intel_gmbus_teardown(dev_priv); + intel_gmbus_teardown(i915); - destroy_workqueue(dev_priv->modeset_wq); + intel_bw_cleanup(i915); - intel_fbc_cleanup_cfb(dev_priv); -} - -/* - * set vga decode state - true == enable VGA decode - */ -int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state) -{ - unsigned reg = INTEL_GEN(dev_priv) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; - u16 gmch_ctrl; - - if (pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl)) { - DRM_ERROR("failed to read control word\n"); - return -EIO; - } + destroy_workqueue(i915->flip_wq); + destroy_workqueue(i915->modeset_wq); - if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !state) - return 0; - - if (state) - gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; - else - gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; - - if (pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl)) { - DRM_ERROR("failed to write control word\n"); - return -EIO; - } - - return 0; + intel_fbc_cleanup_cfb(i915); } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) @@ -16982,7 +18545,7 @@ struct intel_display_error_state { u32 vtotal; u32 vblank; u32 vsync; - } transcoder[4]; + } transcoder[5]; }; struct intel_display_error_state * @@ -16993,13 +18556,14 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) TRANSCODER_A, TRANSCODER_B, TRANSCODER_C, + TRANSCODER_D, TRANSCODER_EDP, }; int i; BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return NULL; error = kzalloc(sizeof(*error), GFP_ATOMIC); @@ -17078,7 +18642,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, if (!error) return; - err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev_priv)->num_pipes); + err_printf(m, "Num Pipes: %d\n", INTEL_NUM_PIPES(dev_priv)); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) err_printf(m, "PWR_WELL_CTL2: %08x\n", error->power_well_driver); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index ee6b8194a459..028aab728514 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -1,5 +1,5 @@ /* - * Copyright © 2006-2017 Intel Corporation + * Copyright © 2006-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,8 +28,33 @@ #include <drm/drm_util.h> #include <drm/i915_drm.h> +enum link_m_n_set; +struct dpll; +struct drm_connector; +struct drm_device; +struct drm_display_mode; +struct drm_encoder; +struct drm_file; +struct drm_format_info; +struct drm_framebuffer; +struct drm_i915_error_state_buf; +struct drm_i915_gem_object; struct drm_i915_private; +struct drm_modeset_acquire_ctx; +struct drm_plane; +struct drm_plane_state; +struct i915_ggtt_view; +struct intel_crtc; +struct intel_crtc_state; +struct intel_digital_port; +struct intel_dp; +struct intel_encoder; +struct intel_load_detect_pipe; +struct intel_plane; struct intel_plane_state; +struct intel_remapped_info; +struct intel_rotation_info; +struct intel_crtc_state; enum i915_gpio { GPIOA, @@ -45,6 +70,8 @@ enum i915_gpio { GPIOK, GPIOL, GPIOM, + GPION, + GPIOO, }; /* @@ -58,6 +85,7 @@ enum pipe { PIPE_A = 0, PIPE_B, PIPE_C, + PIPE_D, _PIPE_EDP, I915_MAX_PIPES = _PIPE_EDP @@ -66,6 +94,7 @@ enum pipe { #define pipe_name(p) ((p) + 'A') enum transcoder { + INVALID_TRANSCODER = -1, /* * The following transcoders have a 1:1 transcoder -> pipe mapping, * keep their values fixed: the code assumes that TRANSCODER_A=0, the @@ -75,6 +104,7 @@ enum transcoder { TRANSCODER_A = PIPE_A, TRANSCODER_B = PIPE_B, TRANSCODER_C = PIPE_C, + TRANSCODER_D = PIPE_D, /* * The following transcoders can map to any pipe, their enum value @@ -98,6 +128,8 @@ static inline const char *transcoder_name(enum transcoder transcoder) return "B"; case TRANSCODER_C: return "C"; + case TRANSCODER_D: + return "D"; case TRANSCODER_EDP: return "EDP"; case TRANSCODER_DSI_A: @@ -154,6 +186,24 @@ enum plane_id { for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ for_each_if((__crtc)->plane_ids_mask & BIT(__p)) +enum port { + PORT_NONE = -1, + + PORT_A = 0, + PORT_B, + PORT_C, + PORT_D, + PORT_E, + PORT_F, + PORT_G, + PORT_H, + PORT_I, + + I915_MAX_PORTS +}; + +#define port_name(p) ((p) + 'A') + /* * Ports identifier referenced from other drivers. * Expected to remain stable over time @@ -173,6 +223,12 @@ static inline const char *port_identifier(enum port port) return "Port E"; case PORT_F: return "Port F"; + case PORT_G: + return "Port G"; + case PORT_H: + return "Port H"; + case PORT_I: + return "Port I"; default: return "<invalid>"; } @@ -185,14 +241,15 @@ enum tc_port { PORT_TC2, PORT_TC3, PORT_TC4, + PORT_TC5, + PORT_TC6, I915_MAX_TC_PORTS }; -enum tc_port_type { - TC_PORT_UNKNOWN = 0, - TC_PORT_TYPEC, - TC_PORT_TBT, +enum tc_port_mode { + TC_PORT_TBT_ALT, + TC_PORT_DP_ALT, TC_PORT_LEGACY, }; @@ -216,6 +273,7 @@ enum aux_ch { AUX_CH_D, AUX_CH_E, /* ICL+ */ AUX_CH_F, + AUX_CH_G, }; #define aux_ch_name(a) ((a) + 'A') @@ -229,11 +287,35 @@ struct intel_link_m_n { u32 link_n; }; +enum phy { + PHY_NONE = -1, + + PHY_A = 0, + PHY_B, + PHY_C, + PHY_D, + PHY_E, + PHY_F, + PHY_G, + PHY_H, + PHY_I, + + I915_MAX_PHYS +}; + +#define phy_name(a) ((a) + 'A') + +enum phy_fia { + FIA1, + FIA2, + FIA3, +}; + #define for_each_pipe(__dev_priv, __p) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) #define for_each_pipe_masked(__dev_priv, __p, __mask) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) \ for_each_if((__mask) & BIT(__p)) #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ @@ -250,10 +332,17 @@ struct intel_link_m_n { (__s) < RUNTIME_INFO(__dev_priv)->num_sprites[(__p)]; \ (__s)++) -#define for_each_port_masked(__port, __ports_mask) \ - for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ +#define for_each_port(__port) \ + for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) + +#define for_each_port_masked(__port, __ports_mask) \ + for_each_port(__port) \ for_each_if((__ports_mask) & BIT(__port)) +#define for_each_phy_masked(__phy, __phys_mask) \ + for ((__phy) = PHY_A; (__phy) < I915_MAX_PHYS; (__phy)++) \ + for_each_if((__phys_mask) & BIT(__phy)) + #define for_each_crtc(dev, crtc) \ list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) @@ -267,7 +356,7 @@ struct intel_link_m_n { &(dev)->mode_config.plane_list, \ base.head) \ for_each_if((plane_mask) & \ - drm_plane_mask(&intel_plane->base))) + drm_plane_mask(&intel_plane->base)) #define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ list_for_each_entry(intel_plane, \ @@ -291,6 +380,13 @@ struct intel_link_m_n { &(dev)->mode_config.encoder_list, \ base.head) +#define for_each_intel_encoder_mask(dev, intel_encoder, encoder_mask) \ + list_for_each_entry(intel_encoder, \ + &(dev)->mode_config.encoder_list, \ + base.head) \ + for_each_if((encoder_mask) & \ + drm_encoder_mask(&intel_encoder->base)) + #define for_each_intel_dp(dev, intel_encoder) \ for_each_intel_encoder(dev, intel_encoder) \ for_each_if(intel_encoder_is_dp(intel_encoder)) @@ -348,14 +444,210 @@ struct intel_link_m_n { (__i)++) \ for_each_if(crtc) +#define for_each_oldnew_intel_crtc_in_state_reverse(__state, crtc, old_crtc_state, new_crtc_state, __i) \ + for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \ + (__i) >= 0 && \ + ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ + (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \ + (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ + (__i)--) \ + for_each_if(crtc) + +#define intel_atomic_crtc_state_for_each_plane_state( \ + plane, plane_state, \ + crtc_state) \ + for_each_intel_plane_mask(((crtc_state)->uapi.state->dev), (plane), \ + ((crtc_state)->uapi.plane_mask)) \ + for_each_if ((plane_state = \ + to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->uapi.state, &plane->base)))) + +#define for_each_new_intel_connector_in_state(__state, connector, new_connector_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.num_connector; \ + (__i)++) \ + for_each_if ((__state)->base.connectors[__i].ptr && \ + ((connector) = to_intel_connector((__state)->base.connectors[__i].ptr), \ + (new_connector_state) = to_intel_digital_connector_state((__state)->base.connectors[__i].new_state), 1)) + void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n); + bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); +int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier); bool intel_plane_can_remap(const struct intel_plane_state *plane_state); +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode); +enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); +bool is_trans_port_sync_mode(const struct intel_crtc_state *state); + +void intel_plane_destroy(struct drm_plane *plane); +void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state); +void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); +void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); +enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); +int vlv_get_cck_clock(struct drm_i915_private *dev_priv, + const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); +void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); +void lpt_disable_iclkip(struct drm_i915_private *dev_priv); +void intel_init_display_hooks(struct drm_i915_private *dev_priv); +unsigned int intel_fb_xy_to_linear(int x, int y, + const struct intel_plane_state *state, + int plane); +unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, + int color_plane, unsigned int height); +void intel_add_fb_offsets(int *x, int *y, + const struct intel_plane_state *state, int plane); +unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); +unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info); +bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv); +int intel_display_suspend(struct drm_device *dev); +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); +void intel_encoder_destroy(struct drm_encoder *encoder); +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder); +bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy); +bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy); +enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, + enum port port); +int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); +void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state); + +int ilk_get_lanes_required(int target_clock, int link_bw, int bpp); +void vlv_wait_port_ready(struct drm_i915_private *dev_priv, + struct intel_digital_port *dport, + unsigned int expected_mask); +int intel_get_load_detect_pipe(struct drm_connector *connector, + struct intel_load_detect_pipe *old, + struct drm_modeset_acquire_ctx *ctx); +void intel_release_load_detect_pipe(struct drm_connector *connector, + struct intel_load_detect_pipe *old, + struct drm_modeset_acquire_ctx *ctx); +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + const struct i915_ggtt_view *view, + bool uses_fence, + unsigned long *out_flags); +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); +struct drm_framebuffer * +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); +int intel_prepare_plane_fb(struct drm_plane *plane, + struct drm_plane_state *new_state); +void intel_cleanup_plane_fb(struct drm_plane *plane, + struct drm_plane_state *old_state); + +void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv, + enum pipe pipe); + +int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, + const struct dpll *dpll); +void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe); +int lpt_get_iclkip(struct drm_i915_private *dev_priv); +bool intel_fuzzy_clock_check(int clock1, int clock2); + +void intel_prepare_reset(struct drm_i915_private *dev_priv); +void intel_finish_reset(struct drm_i915_private *dev_priv); +void intel_dp_get_m_n(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); +void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, + enum link_m_n_set m_n); +int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); +bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, + struct dpll *best_clock); +int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); + +bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); +void hsw_enable_ips(const struct intel_crtc_state *crtc_state); +void hsw_disable_ips(const struct intel_crtc_state *crtc_state); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); +enum intel_display_power_domain +intel_aux_power_domain(struct intel_digital_port *dig_port); +void intel_mode_from_pipe_config(struct drm_display_mode *mode, + struct intel_crtc_state *pipe_config); +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state); + +u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); +int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); +void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); +void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state); +u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); +u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state); +u32 skl_plane_stride(const struct intel_plane_state *plane_state, + int plane); +int skl_check_plane_surface(struct intel_plane_state *plane_state); +int i9xx_check_plane_surface(struct intel_plane_state *plane_state); +int skl_format_to_fourcc(int format, bool rgb_order, bool alpha); +unsigned int i9xx_plane_max_stride(struct intel_plane *plane, + u32 pixel_format, u64 modifier, + unsigned int rotation); +int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); + +struct intel_display_error_state * +intel_display_capture_error_state(struct drm_i915_private *dev_priv); +void intel_display_print_error_state(struct drm_i915_error_state_buf *e, + struct intel_display_error_state *error); + +bool +intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, + uint64_t modifier); + +/* modesetting */ +void intel_modeset_init_hw(struct drm_i915_private *i915); +int intel_modeset_init(struct drm_i915_private *i915); +void intel_modeset_driver_remove(struct drm_i915_private *i915); +void intel_display_resume(struct drm_device *dev); +void intel_init_pch_refclk(struct drm_i915_private *dev_priv); + +/* modesetting asserts */ +void assert_panel_unlocked(struct drm_i915_private *dev_priv, + enum pipe pipe); +void assert_pll(struct drm_i915_private *dev_priv, + enum pipe pipe, bool state); +#define assert_pll_enabled(d, p) assert_pll(d, p, true) +#define assert_pll_disabled(d, p) assert_pll(d, p, false) +void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state); +#define assert_dsi_pll_enabled(d) assert_dsi_pll(d, true) +#define assert_dsi_pll_disabled(d) assert_dsi_pll(d, false) +void assert_fdi_rx_pll(struct drm_i915_private *dev_priv, + enum pipe pipe, bool state); +#define assert_fdi_rx_pll_enabled(d, p) assert_fdi_rx_pll(d, p, true) +#define assert_fdi_rx_pll_disabled(d, p) assert_fdi_rx_pll(d, p, false) +void assert_pipe(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, bool state); +#define assert_pipe_enabled(d, t) assert_pipe(d, t, true) +#define assert_pipe_disabled(d, t) assert_pipe(d, t, false) + +/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and + * WARN_ON()) for hw state sanity checks to check for unexpected conditions + * which may not necessarily be a user visible problem. This will either + * WARN() or DRM_ERROR() depending on the verbose_checks moduleparam, to + * enable distros and users to tailor their preferred amount of i915 abrt + * spam. + */ +#define I915_STATE_WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + if (!WARN(i915_modparams.verbose_state_checks, format)) \ + DRM_ERROR(format); \ + unlikely(__ret_warn_on); \ +}) + +#define I915_STATE_WARN_ON(x) \ + I915_STATE_WARN((x), "%s", "WARN_ON(" __stringify(x) ")") #endif diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 2d1939db108f..21561acfa3ac 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3,8 +3,6 @@ * Copyright © 2019 Intel Corporation */ -#include <linux/vgaarb.h> - #include "display/intel_crt.h" #include "display/intel_dp.h" @@ -13,10 +11,13 @@ #include "intel_cdclk.h" #include "intel_combo_phy.h" #include "intel_csr.h" +#include "intel_display_power.h" +#include "intel_display_types.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" #include "intel_hotplug.h" #include "intel_sideband.h" +#include "intel_tc.h" +#include "intel_vga.h" bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); @@ -33,22 +34,28 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PIPE_B"; case POWER_DOMAIN_PIPE_C: return "PIPE_C"; + case POWER_DOMAIN_PIPE_D: + return "PIPE_D"; case POWER_DOMAIN_PIPE_A_PANEL_FITTER: return "PIPE_A_PANEL_FITTER"; case POWER_DOMAIN_PIPE_B_PANEL_FITTER: return "PIPE_B_PANEL_FITTER"; case POWER_DOMAIN_PIPE_C_PANEL_FITTER: return "PIPE_C_PANEL_FITTER"; + case POWER_DOMAIN_PIPE_D_PANEL_FITTER: + return "PIPE_D_PANEL_FITTER"; case POWER_DOMAIN_TRANSCODER_A: return "TRANSCODER_A"; case POWER_DOMAIN_TRANSCODER_B: return "TRANSCODER_B"; case POWER_DOMAIN_TRANSCODER_C: return "TRANSCODER_C"; + case POWER_DOMAIN_TRANSCODER_D: + return "TRANSCODER_D"; case POWER_DOMAIN_TRANSCODER_EDP: return "TRANSCODER_EDP"; - case POWER_DOMAIN_TRANSCODER_EDP_VDSC: - return "TRANSCODER_EDP_VDSC"; + case POWER_DOMAIN_TRANSCODER_VDSC_PW2: + return "TRANSCODER_VDSC_PW2"; case POWER_DOMAIN_TRANSCODER_DSI_A: return "TRANSCODER_DSI_A"; case POWER_DOMAIN_TRANSCODER_DSI_C: @@ -65,6 +72,12 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_E_LANES"; case POWER_DOMAIN_PORT_DDI_F_LANES: return "PORT_DDI_F_LANES"; + case POWER_DOMAIN_PORT_DDI_G_LANES: + return "PORT_DDI_G_LANES"; + case POWER_DOMAIN_PORT_DDI_H_LANES: + return "PORT_DDI_H_LANES"; + case POWER_DOMAIN_PORT_DDI_I_LANES: + return "PORT_DDI_I_LANES"; case POWER_DOMAIN_PORT_DDI_A_IO: return "PORT_DDI_A_IO"; case POWER_DOMAIN_PORT_DDI_B_IO: @@ -77,6 +90,12 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DDI_F_IO: return "PORT_DDI_F_IO"; + case POWER_DOMAIN_PORT_DDI_G_IO: + return "PORT_DDI_G_IO"; + case POWER_DOMAIN_PORT_DDI_H_IO: + return "PORT_DDI_H_IO"; + case POWER_DOMAIN_PORT_DDI_I_IO: + return "PORT_DDI_I_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -99,16 +118,28 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "AUX_E"; case POWER_DOMAIN_AUX_F: return "AUX_F"; + case POWER_DOMAIN_AUX_G: + return "AUX_G"; + case POWER_DOMAIN_AUX_H: + return "AUX_H"; + case POWER_DOMAIN_AUX_I: + return "AUX_I"; case POWER_DOMAIN_AUX_IO_A: return "AUX_IO_A"; - case POWER_DOMAIN_AUX_TBT1: - return "AUX_TBT1"; - case POWER_DOMAIN_AUX_TBT2: - return "AUX_TBT2"; - case POWER_DOMAIN_AUX_TBT3: - return "AUX_TBT3"; - case POWER_DOMAIN_AUX_TBT4: - return "AUX_TBT4"; + case POWER_DOMAIN_AUX_C_TBT: + return "AUX_C_TBT"; + case POWER_DOMAIN_AUX_D_TBT: + return "AUX_D_TBT"; + case POWER_DOMAIN_AUX_E_TBT: + return "AUX_E_TBT"; + case POWER_DOMAIN_AUX_F_TBT: + return "AUX_F_TBT"; + case POWER_DOMAIN_AUX_G_TBT: + return "AUX_G_TBT"; + case POWER_DOMAIN_AUX_H_TBT: + return "AUX_H_TBT"; + case POWER_DOMAIN_AUX_I_TBT: + return "AUX_I_TBT"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -117,6 +148,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "MODESET"; case POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; + case POWER_DOMAIN_DPLL_DC_OFF: + return "DPLL_DC_OFF"; default: MISSING_CASE(domain); return "?"; @@ -233,23 +266,8 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, u8 irq_pipe_mask, bool has_vga) { - struct pci_dev *pdev = dev_priv->drm.pdev; - - /* - * After we re-enable the power well, if we touch VGA register 0x3d5 - * we'll get unclaimed register interrupts. This stops after we write - * anything to the VGA MSR register. The vgacon module uses this - * register all the time, so if we unbind our driver and, as a - * consequence, bind vgacon, we'll get stuck in an infinite loop at - * console_unlock(). So make here we touch the VGA MSR register, making - * sure vgacon can keep working normally without triggering interrupts - * and error messages. - */ - if (has_vga) { - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - } + if (has_vga) + intel_vga_reset_io_mem(dev_priv); if (irq_pipe_mask) gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask); @@ -269,11 +287,14 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, int pw_idx = power_well->desc->hsw.idx; /* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */ - WARN_ON(intel_wait_for_register(&dev_priv->uncore, - regs->driver, - HSW_PWR_WELL_CTL_STATE(pw_idx), - HSW_PWR_WELL_CTL_STATE(pw_idx), - 1)); + if (intel_de_wait_for_set(dev_priv, regs->driver, + HSW_PWR_WELL_CTL_STATE(pw_idx), 1)) { + DRM_DEBUG_KMS("%s power well enable timeout\n", + power_well->desc->name); + + /* An AUX timeout is expected if the TBT DP tunnel is down. */ + WARN_ON(!power_well->desc->hsw.is_tc_tbt); + } } static u32 hsw_power_well_requesters(struct drm_i915_private *dev_priv, @@ -324,9 +345,8 @@ static void gen9_wait_for_power_well_fuses(struct drm_i915_private *dev_priv, enum skl_power_gate pg) { /* Timeout 5us for PG#0, for other PGs 1us */ - WARN_ON(intel_wait_for_register(&dev_priv->uncore, SKL_FUSE_STATUS, - SKL_FUSE_PG_DIST_STATUS(pg), - SKL_FUSE_PG_DIST_STATUS(pg), 1)); + WARN_ON(intel_de_wait_for_set(dev_priv, SKL_FUSE_STATUS, + SKL_FUSE_PG_DIST_STATUS(pg), 1)); } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -388,7 +408,7 @@ static void hsw_power_well_disable(struct drm_i915_private *dev_priv, hsw_wait_for_power_well_disable(dev_priv, power_well); } -#define ICL_AUX_PW_TO_PORT(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A) +#define ICL_AUX_PW_TO_PHY(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A) static void icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, @@ -396,21 +416,24 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; - enum port port = ICL_AUX_PW_TO_PORT(pw_idx); + enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx); u32 val; + WARN_ON(!IS_ICELAKE(dev_priv)); + val = I915_READ(regs->driver); I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx)); - val = I915_READ(ICL_PORT_CL_DW12(port)); - I915_WRITE(ICL_PORT_CL_DW12(port), val | ICL_LANE_ENABLE_AUX); + if (INTEL_GEN(dev_priv) < 12) { + val = I915_READ(ICL_PORT_CL_DW12(phy)); + I915_WRITE(ICL_PORT_CL_DW12(phy), val | ICL_LANE_ENABLE_AUX); + } hsw_wait_for_power_well_enable(dev_priv, power_well); /* Display WA #1178: icl */ - if (IS_ICELAKE(dev_priv) && - pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B && - !intel_bios_is_port_edp(dev_priv, port)) { + if (pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B && + !intel_bios_is_port_edp(dev_priv, (enum port)phy)) { val = I915_READ(ICL_AUX_ANAOVRD1(pw_idx)); val |= ICL_AUX_ANAOVRD1_ENABLE | ICL_AUX_ANAOVRD1_LDO_BYPASS; I915_WRITE(ICL_AUX_ANAOVRD1(pw_idx), val); @@ -423,11 +446,13 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; - enum port port = ICL_AUX_PW_TO_PORT(pw_idx); + enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx); u32 val; - val = I915_READ(ICL_PORT_CL_DW12(port)); - I915_WRITE(ICL_PORT_CL_DW12(port), val & ~ICL_LANE_ENABLE_AUX); + WARN_ON(!IS_ICELAKE(dev_priv)); + + val = I915_READ(ICL_PORT_CL_DW12(phy)); + I915_WRITE(ICL_PORT_CL_DW12(phy), val & ~ICL_LANE_ENABLE_AUX); val = I915_READ(regs->driver); I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx)); @@ -441,24 +466,119 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, #define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) +static enum aux_ch icl_tc_phy_aux_ch(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int pw_idx = power_well->desc->hsw.idx; + + return power_well->desc->hsw.is_tc_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : + ICL_AUX_PW_TO_CH(pw_idx); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + +static u64 async_put_domains_mask(struct i915_power_domains *power_domains); + +static int power_well_async_ref_count(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int refs = hweight64(power_well->desc->domains & + async_put_domains_mask(&dev_priv->power_domains)); + + WARN_ON(refs > power_well->count); + + return refs; +} + +static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); + struct intel_digital_port *dig_port = NULL; + struct intel_encoder *encoder; + + /* Bypass the check if all references are released asynchronously */ + if (power_well_async_ref_count(dev_priv, power_well) == + power_well->count) + return; + + aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); + + for_each_intel_encoder(&dev_priv->drm, encoder) { + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (!intel_phy_is_tc(dev_priv, phy)) + continue; + + /* We'll check the MST primary port */ + if (encoder->type == INTEL_OUTPUT_DP_MST) + continue; + + dig_port = enc_to_dig_port(encoder); + if (WARN_ON(!dig_port)) + continue; + + if (dig_port->aux_ch != aux_ch) { + dig_port = NULL; + continue; + } + + break; + } + + if (WARN_ON(!dig_port)) + return; + + WARN_ON(!intel_tc_port_ref_held(dig_port)); +} + +#else + +static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ +} + +#endif + +#define TGL_AUX_PW_TO_TC_PORT(pw_idx) ((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - int pw_idx = power_well->desc->hsw.idx; - bool is_tbt = power_well->desc->hsw.is_tc_tbt; - enum aux_ch aux_ch; + enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well); u32 val; - aux_ch = is_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : - ICL_AUX_PW_TO_CH(pw_idx); + icl_tc_port_assert_ref_held(dev_priv, power_well); + val = I915_READ(DP_AUX_CH_CTL(aux_ch)); val &= ~DP_AUX_CH_CTL_TBT_IO; - if (is_tbt) + if (power_well->desc->hsw.is_tc_tbt) val |= DP_AUX_CH_CTL_TBT_IO; I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); hsw_power_well_enable(dev_priv, power_well); + + if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) { + enum tc_port tc_port; + + tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port), + DKL_CMN_UC_DW27_UC_HEALTH, 1)) + DRM_WARN("Timeout waiting TC uC health\n"); + } +} + +static void +icl_tc_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + icl_tc_port_assert_ref_held(dev_priv, power_well); + + hsw_power_well_disable(dev_priv, power_well); } /* @@ -570,7 +690,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) u32 mask; mask = DC_STATE_EN_UPTO_DC5; - if (INTEL_GEN(dev_priv) >= 11) + + if (INTEL_GEN(dev_priv) >= 12) + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6 + | DC_STATE_EN_DC9; + else if (IS_GEN(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; else if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_EN_DC9; @@ -580,7 +704,7 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) return mask; } -void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) +static void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) { u32 val; @@ -640,7 +764,53 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) dev_priv->csr.dc_state = val & mask; } -void bxt_enable_dc9(struct drm_i915_private *dev_priv) +static u32 +sanitize_target_dc_state(struct drm_i915_private *dev_priv, + u32 target_dc_state) +{ + u32 states[] = { + DC_STATE_EN_UPTO_DC6, + DC_STATE_EN_UPTO_DC5, + DC_STATE_EN_DC3CO, + DC_STATE_DISABLE, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + if (target_dc_state != states[i]) + continue; + + if (dev_priv->csr.allowed_dc_mask & target_dc_state) + break; + + target_dc_state = states[i + 1]; + } + + return target_dc_state; +} + +static void tgl_enable_dc3co(struct drm_i915_private *dev_priv) +{ + DRM_DEBUG_KMS("Enabling DC3CO\n"); + gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_disable_dc3co(struct drm_i915_private *dev_priv) +{ + u32 val; + + DRM_DEBUG_KMS("Disabling DC3CO\n"); + val = I915_READ(DC_STATE_EN); + val &= ~DC_STATE_DC3CO_STATUS; + I915_WRITE(DC_STATE_EN, val); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* + * Delay of 200us DC3CO Exit time B.Spec 49196 + */ + usleep_range(200, 210); +} + +static void bxt_enable_dc9(struct drm_i915_private *dev_priv) { assert_can_enable_dc9(dev_priv); @@ -655,7 +825,7 @@ void bxt_enable_dc9(struct drm_i915_private *dev_priv) gen9_set_dc_state(dev_priv, DC_STATE_EN_DC9); } -void bxt_disable_dc9(struct drm_i915_private *dev_priv) +static void bxt_disable_dc9(struct drm_i915_private *dev_priv) { assert_can_disable_dc9(dev_priv); @@ -695,6 +865,51 @@ lookup_power_well(struct drm_i915_private *dev_priv, return &dev_priv->power_domains.power_wells[0]; } +/** + * intel_display_power_set_target_dc_state - Set target dc state. + * @dev_priv: i915 device + * @state: state which needs to be set as target_dc_state. + * + * This function set the "DC off" power well target_dc_state, + * based upon this target_dc_stste, "DC off" power well will + * enable desired DC state. + */ +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state) +{ + struct i915_power_well *power_well; + bool dc_off_enabled; + struct i915_power_domains *power_domains = &dev_priv->power_domains; + + mutex_lock(&power_domains->lock); + power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF); + + if (WARN_ON(!power_well)) + goto unlock; + + state = sanitize_target_dc_state(dev_priv, state); + + if (state == dev_priv->csr.target_dc_state) + goto unlock; + + dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv, + power_well); + /* + * If DC off power well is disabled, need to enable and disable the + * DC off power well to effect target DC state. + */ + if (!dc_off_enabled) + power_well->desc->ops->enable(dev_priv, power_well); + + dev_priv->csr.target_dc_state = state; + + if (!dc_off_enabled) + power_well->desc->ops->disable(dev_priv, power_well); + +unlock: + mutex_unlock(&power_domains->lock); +} + static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) { bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv, @@ -709,7 +924,7 @@ static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void gen9_enable_dc5(struct drm_i915_private *dev_priv) +static void gen9_enable_dc5(struct drm_i915_private *dev_priv) { assert_can_enable_dc5(dev_priv); @@ -733,7 +948,7 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void skl_enable_dc6(struct drm_i915_private *dev_priv) +static void skl_enable_dc6(struct drm_i915_private *dev_priv) { assert_can_enable_dc6(dev_priv); @@ -807,7 +1022,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0; + return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0); } static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) @@ -819,11 +1035,15 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) "Unexpected DBuf power power state (0x%08x)\n", tmp); } -static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = {}; + if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) { + tgl_disable_dc3co(dev_priv); + return; + } + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); @@ -844,16 +1064,29 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, intel_combo_phy_init(dev_priv); } +static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + gen9_disable_dc_states(dev_priv); +} + static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { if (!dev_priv->csr.dmc_payload) return; - if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6) + switch (dev_priv->csr.target_dc_state) { + case DC_STATE_EN_DC3CO: + tgl_enable_dc3co(dev_priv); + break; + case DC_STATE_EN_UPTO_DC6: skl_enable_dc6(dev_priv); - else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5) + break; + case DC_STATE_EN_UPTO_DC5: gen9_enable_dc5(dev_priv); + break; + } } static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, @@ -1059,7 +1292,7 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) intel_crt_reset(&encoder->base); } - i915_redisable_vga_power_on(dev_priv); + intel_vga_redisable_power_on(dev_priv); intel_pps_unlock_regs_wa(dev_priv); } @@ -1071,7 +1304,7 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ - synchronize_irq(dev_priv->drm.irq); + intel_synchronize_irq(dev_priv); intel_power_sequencer_reset(dev_priv); @@ -1232,11 +1465,8 @@ static void assert_chv_phy_status(struct drm_i915_private *dev_priv) * The PHY may be busy with some initial calibration and whatnot, * so the power state can take a while to actually change. */ - if (intel_wait_for_register(&dev_priv->uncore, - DISPLAY_PHY_STATUS, - phy_status_mask, - phy_status, - 10)) + if (intel_de_wait_for_register(dev_priv, DISPLAY_PHY_STATUS, + phy_status_mask, phy_status, 10)) DRM_ERROR("Unexpected PHY_STATUS 0x%08x, expected 0x%08x (PHY_CONTROL=0x%08x)\n", I915_READ(DISPLAY_PHY_STATUS) & phy_status_mask, phy_status, dev_priv->chv_phy_control); @@ -1267,11 +1497,8 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, true); /* Poll for phypwrgood signal */ - if (intel_wait_for_register(&dev_priv->uncore, - DISPLAY_PHY_STATUS, - PHY_POWERGOOD(phy), - PHY_POWERGOOD(phy), - 1)) + if (intel_de_wait_for_set(dev_priv, DISPLAY_PHY_STATUS, + PHY_POWERGOOD(phy), 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); vlv_dpio_get(dev_priv); @@ -1437,8 +1664,8 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_power_domains *power_domains = &dev_priv->power_domains; - enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base)); - enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(encoder)); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder)); mutex_lock(&power_domains->lock); @@ -2332,24 +2559,19 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ BIT_ULL(POWER_DOMAIN_AUX_E) | \ BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2359,7 +2581,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, */ #define ICL_PW_2_POWER_DOMAINS ( \ ICL_PW_3_POWER_DOMAINS | \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_EDP_VDSC) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_VDSC_PW2) | \ BIT_ULL(POWER_DOMAIN_INIT)) /* * - KVMR (HW control) @@ -2368,6 +2590,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define ICL_DDI_IO_A_POWER_DOMAINS ( \ @@ -2388,22 +2611,120 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A)) #define ICL_AUX_B_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B)) -#define ICL_AUX_C_IO_POWER_DOMAINS ( \ +#define ICL_AUX_C_TC1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C)) +#define ICL_AUX_D_TC2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D)) +#define ICL_AUX_E_TC3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E)) +#define ICL_AUX_F_TC4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F)) +#define ICL_AUX_C_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT)) +#define ICL_AUX_D_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define ICL_AUX_E_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define ICL_AUX_F_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) + +#define TGL_PW_5_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PIPE_D) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_D) | \ + BIT_ULL(POWER_DOMAIN_PIPE_D_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define TGL_PW_4_POWER_DOMAINS ( \ + TGL_PW_5_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define TGL_PW_3_POWER_DOMAINS ( \ + TGL_PW_4_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_AUX_G) | \ + BIT_ULL(POWER_DOMAIN_AUX_H) | \ + BIT_ULL(POWER_DOMAIN_AUX_I) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define TGL_PW_2_POWER_DOMAINS ( \ + TGL_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_VDSC_PW2) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define TGL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + TGL_PW_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define TGL_DDI_IO_D_TC1_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO)) +#define TGL_DDI_IO_E_TC2_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO)) +#define TGL_DDI_IO_F_TC3_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO)) +#define TGL_DDI_IO_G_TC4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_IO)) +#define TGL_DDI_IO_H_TC5_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_IO)) +#define TGL_DDI_IO_I_TC6_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_IO)) + +#define TGL_AUX_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_A)) +#define TGL_AUX_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B)) +#define TGL_AUX_C_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_C)) -#define ICL_AUX_D_IO_POWER_DOMAINS ( \ +#define TGL_AUX_D_TC1_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_D)) -#define ICL_AUX_E_IO_POWER_DOMAINS ( \ +#define TGL_AUX_E_TC2_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_E)) -#define ICL_AUX_F_IO_POWER_DOMAINS ( \ +#define TGL_AUX_F_TC3_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_F)) -#define ICL_AUX_TBT1_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1)) -#define ICL_AUX_TBT2_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2)) -#define ICL_AUX_TBT3_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3)) -#define ICL_AUX_TBT4_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4)) +#define TGL_AUX_G_TC4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G)) +#define TGL_AUX_H_TC5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H)) +#define TGL_AUX_I_TC6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I)) +#define TGL_AUX_D_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define TGL_AUX_E_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define TGL_AUX_F_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) +#define TGL_AUX_G_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT)) +#define TGL_AUX_H_TBT5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT)) +#define TGL_AUX_I_TBT6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, @@ -2715,7 +3036,7 @@ static const struct i915_power_well_desc skl_power_wells[] = { .name = "DC off", .domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -2797,7 +3118,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = { .name = "DC off", .domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -2857,7 +3178,7 @@ static const struct i915_power_well_desc glk_power_wells[] = { .name = "DC off", .domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3026,7 +3347,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = { .name = "DC off", .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3113,7 +3434,7 @@ static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = { static const struct i915_power_well_ops icl_tc_phy_aux_power_well_ops = { .sync_hw = hsw_power_well_sync_hw, .enable = icl_tc_phy_aux_power_well_enable, - .disable = hsw_power_well_disable, + .disable = icl_tc_phy_aux_power_well_disable, .is_enabled = hsw_power_well_enabled, }; @@ -3154,7 +3475,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { .name = "DC off", .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3261,8 +3582,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX C", - .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .name = "AUX C TC1", + .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3272,8 +3593,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX D", - .domains = ICL_AUX_D_IO_POWER_DOMAINS, + .name = "AUX D TC2", + .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3283,8 +3604,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX E", - .domains = ICL_AUX_E_IO_POWER_DOMAINS, + .name = "AUX E TC3", + .domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3294,8 +3615,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX F", - .domains = ICL_AUX_F_IO_POWER_DOMAINS, + .name = "AUX F TC4", + .domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3305,8 +3626,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT1", - .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .name = "AUX C TBT1", + .domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3316,8 +3637,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT2", - .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .name = "AUX D TBT2", + .domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3327,8 +3648,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT3", - .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .name = "AUX E TBT3", + .domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3338,8 +3659,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT4", - .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .name = "AUX F TBT4", + .domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3362,6 +3683,480 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }; +static const struct i915_power_well_desc ehl_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 2", + .domains = ICL_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_2, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 3", + .domains = ICL_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + }, + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + }, + }, + { + .name = "DDI C IO", + .domains = ICL_DDI_IO_C_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_C, + }, + }, + { + .name = "DDI D IO", + .domains = ICL_DDI_IO_D_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_D, + }, + }, + { + .name = "AUX A", + .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX C", + .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_C, + }, + }, + { + .name = "AUX D", + .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_D, + }, + }, + { + .name = "power well 4", + .domains = ICL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + }, + }, +}; + +static const struct i915_power_well_desc tgl_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 2", + .domains = TGL_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_2, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 3", + .domains = TGL_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + } + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + } + }, + { + .name = "DDI C IO", + .domains = ICL_DDI_IO_C_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_C, + } + }, + { + .name = "DDI D TC1 IO", + .domains = TGL_DDI_IO_D_TC1_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC1, + }, + }, + { + .name = "DDI E TC2 IO", + .domains = TGL_DDI_IO_E_TC2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC2, + }, + }, + { + .name = "DDI F TC3 IO", + .domains = TGL_DDI_IO_F_TC3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC3, + }, + }, + { + .name = "DDI G TC4 IO", + .domains = TGL_DDI_IO_G_TC4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC4, + }, + }, + { + .name = "DDI H TC5 IO", + .domains = TGL_DDI_IO_H_TC5_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC5, + }, + }, + { + .name = "DDI I TC6 IO", + .domains = TGL_DDI_IO_I_TC6_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC6, + }, + }, + { + .name = "AUX A", + .domains = TGL_AUX_A_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = TGL_AUX_B_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX C", + .domains = TGL_AUX_C_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_C, + }, + }, + { + .name = "AUX D TC1", + .domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX E TC2", + .domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC2, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX F TC3", + .domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC3, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX G TC4", + .domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC4, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX H TC5", + .domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC5, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX I TC6", + .domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS, + .ops = &icl_tc_phy_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC6, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX D TBT1", + .domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT1, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "AUX E TBT2", + .domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT2, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "AUX F TBT3", + .domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT3, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "AUX G TBT4", + .domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT4, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "AUX H TBT5", + .domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT5, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "AUX I TBT6", + .domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TBT6, + .hsw.is_tc_tbt = true, + }, + }, + { + .name = "power well 4", + .domains = TGL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + } + }, + { + .name = "power well 5", + .domains = TGL_PW_5_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_PW_5, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_D), + }, + }, +}; + static int sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, int disable_power_well) @@ -3379,14 +4174,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (INTEL_GEN(dev_priv) >= 11) { - max_dc = 2; + if (INTEL_GEN(dev_priv) >= 12) { + max_dc = 4; /* * DC9 has a separate HW flow from the rest of the DC states, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ mask = DC_STATE_EN_DC9; + } else if (IS_GEN(dev_priv, 11)) { + max_dc = 2; + mask = DC_STATE_EN_DC9; } else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; @@ -3405,7 +4203,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = enable_dc; } else if (enable_dc == -1) { requested_dc = max_dc; - } else if (enable_dc > max_dc && enable_dc <= 2) { + } else if (enable_dc > max_dc && enable_dc <= 4) { DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n", enable_dc, max_dc); requested_dc = max_dc; @@ -3414,10 +4212,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = max_dc; } - if (requested_dc > 1) + switch (requested_dc) { + case 4: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6; + break; + case 3: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5; + break; + case 2: mask |= DC_STATE_EN_UPTO_DC6; - if (requested_dc > 0) + break; + case 1: mask |= DC_STATE_EN_UPTO_DC5; + break; + } DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask); @@ -3478,6 +4286,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); + dev_priv->csr.target_dc_state = + sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); @@ -3489,7 +4300,11 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (IS_GEN(dev_priv, 11)) { + if (IS_GEN(dev_priv, 12)) { + err = set_power_wells(power_domains, tgl_power_wells); + } else if (IS_ELKHARTLAKE(dev_priv)) { + err = set_power_wells(power_domains, ehl_power_wells); + } else if (IS_GEN(dev_priv, 11)) { err = set_power_wells(power_domains, icl_power_wells); } else if (IS_CANNONLAKE(dev_priv)) { err = set_power_wells(power_domains, cnl_power_wells); @@ -3773,8 +4588,7 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, I915_WRITE(LCPLL_CTL, val); POSTING_READ(LCPLL_CTL); - if (intel_wait_for_register(&dev_priv->uncore, LCPLL_CTL, - LCPLL_PLL_LOCK, 0, 1)) + if (intel_de_wait_for_clear(dev_priv, LCPLL_CTL, LCPLL_PLL_LOCK, 1)) DRM_ERROR("LCPLL still locked\n"); val = hsw_read_dcomp(dev_priv); @@ -3829,8 +4643,7 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) val &= ~LCPLL_PLL_DISABLE; I915_WRITE(LCPLL_CTL, val); - if (intel_wait_for_register(&dev_priv->uncore, LCPLL_CTL, - LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, 5)) + if (intel_de_wait_for_set(dev_priv, LCPLL_CTL, LCPLL_PLL_LOCK, 5)) DRM_ERROR("LCPLL not locked yet\n"); if (val & LCPLL_CD_SOURCE_FCLK) { @@ -3872,7 +4685,7 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) * For more, read "Display Sequences for Package C8" on the hardware * documentation. */ -void hsw_enable_pc8(struct drm_i915_private *dev_priv) +static void hsw_enable_pc8(struct drm_i915_private *dev_priv) { u32 val; @@ -3888,7 +4701,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv) hsw_disable_lcpll(dev_priv, true, true); } -void hsw_disable_pc8(struct drm_i915_private *dev_priv) +static void hsw_disable_pc8(struct drm_i915_private *dev_priv) { u32 val; @@ -3963,7 +4776,7 @@ static void skl_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + gen9_disable_dc_states(dev_priv); gen9_dbuf_disable(dev_priv); @@ -3988,8 +4801,7 @@ static void skl_display_core_uninit(struct drm_i915_private *dev_priv) usleep_range(10, 30); /* 10 us delay per Bspec */ } -void bxt_display_core_init(struct drm_i915_private *dev_priv, - bool resume) +static void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; @@ -4020,12 +4832,12 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, intel_csr_load_program(dev_priv); } -void bxt_display_core_uninit(struct drm_i915_private *dev_priv) +static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + gen9_disable_dc_states(dev_priv); gen9_dbuf_disable(dev_priv); @@ -4085,7 +4897,7 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + gen9_disable_dc_states(dev_priv); /* 1. Disable all display engine functions -> aready done */ @@ -4111,8 +4923,58 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) intel_combo_phy_uninit(dev_priv); } -void icl_display_core_init(struct drm_i915_private *dev_priv, - bool resume) +struct buddy_page_mask { + u32 page_mask; + u8 type; + u8 num_channels; +}; + +static const struct buddy_page_mask tgl_buddy_page_masks[] = { + { .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0xE }, + { .num_channels = 1, .type = INTEL_DRAM_DDR4, .page_mask = 0xF }, + { .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1C }, + { .num_channels = 2, .type = INTEL_DRAM_DDR4, .page_mask = 0x1F }, + {} +}; + +static const struct buddy_page_mask wa_1409767108_buddy_page_masks[] = { + { .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1 }, + { .num_channels = 1, .type = INTEL_DRAM_DDR4, .page_mask = 0x1 }, + { .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x3 }, + { .num_channels = 2, .type = INTEL_DRAM_DDR4, .page_mask = 0x3 }, + {} +}; + +static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) +{ + enum intel_dram_type type = dev_priv->dram_info.type; + u8 num_channels = dev_priv->dram_info.num_channels; + const struct buddy_page_mask *table; + int i; + + if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) + /* Wa_1409767108: tgl */ + table = wa_1409767108_buddy_page_masks; + else + table = tgl_buddy_page_masks; + + for (i = 0; table[i].page_mask != 0; i++) + if (table[i].num_channels == num_channels && + table[i].type == type) + break; + + if (table[i].page_mask == 0) { + DRM_DEBUG_DRIVER("Unknown memory configuration; disabling address buddy logic.\n"); + I915_WRITE(BW_BUDDY1_CTL, BW_BUDDY_DISABLE); + I915_WRITE(BW_BUDDY2_CTL, BW_BUDDY_DISABLE); + } else { + I915_WRITE(BW_BUDDY1_PAGE_MASK, table[i].page_mask); + I915_WRITE(BW_BUDDY2_PAGE_MASK, table[i].page_mask); + } +} + +static void icl_display_core_init(struct drm_i915_private *dev_priv, + bool resume) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; @@ -4143,16 +5005,20 @@ void icl_display_core_init(struct drm_i915_private *dev_priv, /* 6. Setup MBUS. */ icl_mbus_init(dev_priv); + /* 7. Program arbiter BW_BUDDY registers */ + if (INTEL_GEN(dev_priv) >= 12) + tgl_bw_buddy_init(dev_priv); + if (resume && dev_priv->csr.dmc_payload) intel_csr_load_program(dev_priv); } -void icl_display_core_uninit(struct drm_i915_private *dev_priv) +static void icl_display_core_uninit(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + gen9_disable_dc_states(dev_priv); /* 1. Disable all display engine functions -> aready done */ @@ -4337,7 +5203,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); * * It will return with power domains disabled (to be enabled later by * intel_power_domains_enable()) and must be paired with - * intel_power_domains_fini_hw(). + * intel_power_domains_driver_remove(). */ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) { @@ -4345,6 +5211,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) power_domains->initializing = true; + /* Must happen before power domain init on VLV/CHV */ + intel_update_rawclk(i915); + if (INTEL_GEN(i915) >= 11) { icl_display_core_init(i915, resume); } else if (IS_CANNONLAKE(i915)) { @@ -4389,7 +5258,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) } /** - * intel_power_domains_fini_hw - deinitialize hw power domain state + * intel_power_domains_driver_remove - deinitialize hw power domain state * @i915: i915 device instance * * De-initializes the display power domain HW state. It also ensures that the @@ -4399,7 +5268,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) * intel_power_domains_disable()) and must be paired with * intel_power_domains_init_hw(). */ -void intel_power_domains_fini_hw(struct drm_i915_private *i915) +void intel_power_domains_driver_remove(struct drm_i915_private *i915) { intel_wakeref_t wakeref __maybe_unused = fetch_and_zero(&i915->power_domains.wakeref); @@ -4623,3 +5492,58 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) } #endif + +void intel_display_power_suspend_late(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11 || IS_GEN9_LP(i915)) + bxt_enable_dc9(i915); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + hsw_enable_pc8(i915); +} + +void intel_display_power_resume_early(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11 || IS_GEN9_LP(i915)) { + gen9_sanitize_dc_state(i915); + bxt_disable_dc9(i915); + } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { + hsw_disable_pc8(i915); + } +} + +void intel_display_power_suspend(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) { + icl_display_core_uninit(i915); + bxt_enable_dc9(i915); + } else if (IS_GEN9_LP(i915)) { + bxt_display_core_uninit(i915); + bxt_enable_dc9(i915); + } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { + hsw_enable_pc8(i915); + } +} + +void intel_display_power_resume(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) { + bxt_disable_dc9(i915); + icl_display_core_init(i915, true); + if (i915->csr.dmc_payload) { + if (i915->csr.allowed_dc_mask & + DC_STATE_EN_UPTO_DC6) + skl_enable_dc6(i915); + else if (i915->csr.allowed_dc_mask & + DC_STATE_EN_UPTO_DC5) + gen9_enable_dc5(i915); + } + } else if (IS_GEN9_LP(i915)) { + bxt_disable_dc9(i915); + bxt_display_core_init(i915, true); + if (i915->csr.dmc_payload && + (i915->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)) + gen9_enable_dc5(i915); + } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { + hsw_disable_pc8(i915); + } +} diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index ff57b0a7fe59..2608a65af7fa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -18,14 +18,18 @@ enum intel_display_power_domain { POWER_DOMAIN_PIPE_A, POWER_DOMAIN_PIPE_B, POWER_DOMAIN_PIPE_C, + POWER_DOMAIN_PIPE_D, POWER_DOMAIN_PIPE_A_PANEL_FITTER, POWER_DOMAIN_PIPE_B_PANEL_FITTER, POWER_DOMAIN_PIPE_C_PANEL_FITTER, + POWER_DOMAIN_PIPE_D_PANEL_FITTER, POWER_DOMAIN_TRANSCODER_A, POWER_DOMAIN_TRANSCODER_B, POWER_DOMAIN_TRANSCODER_C, + POWER_DOMAIN_TRANSCODER_D, POWER_DOMAIN_TRANSCODER_EDP, - POWER_DOMAIN_TRANSCODER_EDP_VDSC, + /* VDSC/joining for eDP/DSI transcoder (ICL) or pipe A (TGL) */ + POWER_DOMAIN_TRANSCODER_VDSC_PW2, POWER_DOMAIN_TRANSCODER_DSI_A, POWER_DOMAIN_TRANSCODER_DSI_C, POWER_DOMAIN_PORT_DDI_A_LANES, @@ -34,12 +38,18 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, POWER_DOMAIN_PORT_DDI_F_LANES, + POWER_DOMAIN_PORT_DDI_G_LANES, + POWER_DOMAIN_PORT_DDI_H_LANES, + POWER_DOMAIN_PORT_DDI_I_LANES, POWER_DOMAIN_PORT_DDI_A_IO, POWER_DOMAIN_PORT_DDI_B_IO, POWER_DOMAIN_PORT_DDI_C_IO, POWER_DOMAIN_PORT_DDI_D_IO, POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DDI_F_IO, + POWER_DOMAIN_PORT_DDI_G_IO, + POWER_DOMAIN_PORT_DDI_H_IO, + POWER_DOMAIN_PORT_DDI_I_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -51,19 +61,48 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_D, POWER_DOMAIN_AUX_E, POWER_DOMAIN_AUX_F, + POWER_DOMAIN_AUX_G, + POWER_DOMAIN_AUX_H, + POWER_DOMAIN_AUX_I, POWER_DOMAIN_AUX_IO_A, - POWER_DOMAIN_AUX_TBT1, - POWER_DOMAIN_AUX_TBT2, - POWER_DOMAIN_AUX_TBT3, - POWER_DOMAIN_AUX_TBT4, + POWER_DOMAIN_AUX_C_TBT, + POWER_DOMAIN_AUX_D_TBT, + POWER_DOMAIN_AUX_E_TBT, + POWER_DOMAIN_AUX_F_TBT, + POWER_DOMAIN_AUX_G_TBT, + POWER_DOMAIN_AUX_H_TBT, + POWER_DOMAIN_AUX_I_TBT, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_DPLL_DC_OFF, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, }; +/* + * i915_power_well_id: + * + * IDs used to look up power wells. Power wells accessed directly bypassing + * the power domains framework must be assigned a unique ID. The rest of power + * wells must be assigned DISP_PW_ID_NONE. + */ +enum i915_power_well_id { + DISP_PW_ID_NONE, + + VLV_DISP_PW_DISP2D, + BXT_DISP_PW_DPIO_CMN_A, + VLV_DISP_PW_DPIO_CMN_BC, + GLK_DISP_PW_DPIO_CMN_C, + CHV_DISP_PW_DPIO_CMN_D, + HSW_DISP_PW_GLOBAL, + SKL_DISP_PW_MISC_IO, + SKL_DISP_PW_1, + SKL_DISP_PW_2, + SKL_DISP_DC_OFF, +}; + #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) #define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) @@ -204,27 +243,22 @@ struct i915_power_domains { for_each_power_well_reverse(__dev_priv, __power_well) \ for_each_if((__power_well)->desc->domains & (__domain_mask)) -void skl_enable_dc6(struct drm_i915_private *dev_priv); -void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv); -void bxt_enable_dc9(struct drm_i915_private *dev_priv); -void bxt_disable_dc9(struct drm_i915_private *dev_priv); -void gen9_enable_dc5(struct drm_i915_private *dev_priv); - int intel_power_domains_init(struct drm_i915_private *dev_priv); void intel_power_domains_cleanup(struct drm_i915_private *dev_priv); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); -void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv); -void icl_display_core_init(struct drm_i915_private *dev_priv, bool resume); -void icl_display_core_uninit(struct drm_i915_private *dev_priv); +void intel_power_domains_driver_remove(struct drm_i915_private *dev_priv); void intel_power_domains_enable(struct drm_i915_private *dev_priv); void intel_power_domains_disable(struct drm_i915_private *dev_priv); void intel_power_domains_suspend(struct drm_i915_private *dev_priv, enum i915_drm_suspend_mode); void intel_power_domains_resume(struct drm_i915_private *dev_priv); -void hsw_enable_pc8(struct drm_i915_private *dev_priv); -void hsw_disable_pc8(struct drm_i915_private *dev_priv); -void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); -void bxt_display_core_uninit(struct drm_i915_private *dev_priv); + +void intel_display_power_suspend_late(struct drm_i915_private *i915); +void intel_display_power_resume_early(struct drm_i915_private *i915); +void intel_display_power_suspend(struct drm_i915_private *i915); +void intel_display_power_resume(struct drm_i915_private *i915); +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state); const char * intel_display_power_domain_str(enum intel_display_power_domain domain); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1d58f7ec5d84..888ea8a170d1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -22,8 +22,9 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifndef __INTEL_DRV_H__ -#define __INTEL_DRV_H__ + +#ifndef __INTEL_DISPLAY_TYPES_H__ +#define __INTEL_DISPLAY_TYPES_H__ #include <linux/async.h> #include <linux/i2c.h> @@ -67,15 +68,30 @@ enum intel_output_type { INTEL_OUTPUT_DP_MST = 11, }; +enum hdmi_force_audio { + HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ + HDMI_AUDIO_OFF, /* force turn off HDMI audio */ + HDMI_AUDIO_AUTO, /* trust EDID */ + HDMI_AUDIO_ON, /* force turn on HDMI audio */ +}; + +/* "Broadcast RGB" property */ +enum intel_broadcast_rgb { + INTEL_BROADCAST_RGB_AUTO, + INTEL_BROADCAST_RGB_FULL, + INTEL_BROADCAST_RGB_LIMITED, +}; + struct intel_framebuffer { struct drm_framebuffer base; + struct intel_frontbuffer *frontbuffer; struct intel_rotation_info rot_info; /* for each plane in the normal GTT view */ struct { unsigned int x, y; - } normal[2]; - /* for each plane in the rotated GTT view */ + } normal[4]; + /* for each plane in the rotated GTT view for no-CCS formats */ struct { unsigned int x, y; unsigned int pitch; /* pixels */ @@ -101,20 +117,31 @@ struct intel_fbdev { struct mutex hpd_lock; }; +enum intel_hotplug_state { + INTEL_HOTPLUG_UNCHANGED, + INTEL_HOTPLUG_CHANGED, + INTEL_HOTPLUG_RETRY, +}; + struct intel_encoder { struct drm_encoder base; enum intel_output_type type; enum port port; - unsigned int cloneable; - bool (*hotplug)(struct intel_encoder *encoder, - struct intel_connector *connector); + u16 cloneable; + u8 pipe_mask; + enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received); enum intel_output_type (*compute_output_type)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); int (*compute_config)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); + void (*update_prepare)(struct intel_atomic_state *, + struct intel_encoder *, + struct intel_crtc *); void (*pre_pll_enable)(struct intel_encoder *, const struct intel_crtc_state *, const struct drm_connector_state *); @@ -124,6 +151,9 @@ struct intel_encoder { void (*enable)(struct intel_encoder *, const struct intel_crtc_state *, const struct drm_connector_state *); + void (*update_complete)(struct intel_atomic_state *, + struct intel_encoder *, + struct intel_crtc *); void (*disable)(struct intel_encoder *, const struct intel_crtc_state *, const struct drm_connector_state *); @@ -158,7 +188,6 @@ struct intel_encoder { * device interrupts are disabled. */ void (*suspend)(struct intel_encoder *); - int crtc_mask; enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ @@ -359,6 +388,13 @@ struct intel_hdcp { wait_queue_head_t cp_irq_queue; atomic_t cp_irq_count; int cp_irq_count_cached; + + /* + * HDCP register access for gen12+ need the transcoder associated. + * Transcoder attached to the connector could be changed at modeset. + * Hence caching the transcoder here. + */ + enum transcoder cpu_transcoder; }; struct intel_connector { @@ -452,9 +488,9 @@ struct intel_atomic_state { * but the converse is not necessarily true; simply changing a mode may * not flip the final active status of any CRTC's */ - unsigned int active_pipe_changes; + u8 active_pipe_changes; - unsigned int active_crtcs; + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ @@ -470,6 +506,14 @@ struct intel_atomic_state { bool rps_interactive; + /* + * active_pipes + * min_cdclk[] + * min_voltage_level[] + * cdclk.* + */ + bool global_state_changed; + /* Gen9+ only */ struct skl_ddb_values wm_results; @@ -479,7 +523,24 @@ struct intel_atomic_state { }; struct intel_plane_state { - struct drm_plane_state base; + struct drm_plane_state uapi; + + /* + * actual hardware state, the state we program to the hardware. + * The following members are used to verify the hardware state: + * During initial hw readout, they need to be copied from uapi. + */ + struct { + struct drm_crtc *crtc; + struct drm_framebuffer *fb; + + u16 alpha; + uint16_t pixel_blend_mode; + unsigned int rotation; + enum drm_color_encoding color_encoding; + enum drm_color_range color_range; + } hw; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long flags; @@ -494,7 +555,7 @@ struct intel_plane_state { */ u32 stride; int x, y; - } color_plane[2]; + } color_plane[4]; /* plane control register */ u32 ctl; @@ -502,6 +563,9 @@ struct intel_plane_state { /* plane color control register */ u32 color_ctl; + /* chroma upsampler control register */ + u32 cus_ctl; + /* * scaler_id * = -1 : not using a scaler @@ -523,24 +587,24 @@ struct intel_plane_state { int scaler_id; /* - * linked_plane: + * planar_linked_plane: * * ICL planar formats require 2 planes that are updated as pairs. * This member is used to make sure the other plane is also updated * when required, and for update_slave() to find the correct * plane_state to pass as argument. */ - struct intel_plane *linked_plane; + struct intel_plane *planar_linked_plane; /* - * slave: + * planar_slave: * If set don't update use the linked plane's state for updating * this plane during atomic commit with the update_slave() callback. * * It's also used by the watermark code to ignore wm calculations on * this plane. They're calculated by the linked plane's wm code. */ - u32 slave; + u32 planar_slave; struct drm_intel_sprite_colorkey ckey; }; @@ -713,7 +777,33 @@ enum intel_output_format { }; struct intel_crtc_state { - struct drm_crtc_state base; + /* + * uapi (drm) state. This is the software state shown to userspace. + * In particular, the following members are used for bookkeeping: + * - crtc + * - state + * - *_changed + * - event + * - commit + * - mode_blob + */ + struct drm_crtc_state uapi; + + /* + * actual hardware state, the state we program to the hardware. + * The following members are used to verify the hardware state: + * - enable + * - active + * - mode / adjusted_mode + * - color property blobs. + * + * During initial hw readout, they need to be copied to uapi. + */ + struct { + bool active, enable; + struct drm_property_blob *degamma_lut, *gamma_lut, *ctm; + struct drm_display_mode mode, adjusted_mode; + } hw; /** * quirks - bitfield with hw state readout quirks @@ -730,8 +820,8 @@ struct intel_crtc_state { bool update_pipe; /* can a fast modeset be performed? */ bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ - bool fb_changed; /* fb on any of the planes is changed */ bool fifo_changed; /* FIFO split is changed */ + bool preload_luts; /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, @@ -812,6 +902,15 @@ struct intel_crtc_state { /* Actual register state of the dpll, for shared dpll cross-checking. */ struct intel_dpll_hw_state dpll_hw_state; + /* + * ICL reserved DPLLs for the CRTC/port. The active PLL is selected by + * setting shared_dpll and dpll_hw_state to one of these reserved ones. + */ + struct icl_port_dpll { + struct intel_shared_dpll *pll; + struct intel_dpll_hw_state hw_state; + } icl_port_dplls[ICL_PORT_DPLL_COUNT]; + /* DSI PLL registers */ struct { u32 ctrl, div; @@ -826,10 +925,11 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + u32 dc3co_exitline; /* * Frequence the dpll for the port should run at. Differs from the - * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also + * adjusted dotclock e.g. for DP or 10/12bpc hdmi mode. This is also * already multiplied by pixel_multiplier. */ int port_clock; @@ -887,6 +987,8 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; + int min_cdclk[I915_MAX_PLANES]; + u32 data_rate[I915_MAX_PLANES]; /* Gamma mode programmed on the pipe */ @@ -941,11 +1043,20 @@ struct intel_crtc_state { bool dsc_split; u16 compressed_bpp; u8 slice_count; - } dsc_params; - struct drm_dsc_config dp_dsc_cfg; + struct drm_dsc_config config; + } dsc; /* Forward Error correction State */ bool fec_enable; + + /* Pointer to master transcoder in case of tiled displays */ + enum transcoder master_transcoder; + + /* Bitmask to indicate slaves attached */ + u8 sync_mode_slaves_mask; + + /* Only valid on TGL+ */ + enum transcoder mst_master_transcoder; }; struct intel_crtc { @@ -988,6 +1099,9 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; + + /* per pipe DSB related info */ + struct intel_dsb dsb; }; struct intel_plane { @@ -1015,14 +1129,13 @@ struct intel_plane { void (*update_plane)(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); - void (*update_slave)(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); + int (*min_cdclk)(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); }; struct intel_watermark_params { @@ -1046,12 +1159,12 @@ struct cxsr_latency { #define to_intel_atomic_state(x) container_of(x, struct intel_atomic_state, base) #define to_intel_crtc(x) container_of(x, struct intel_crtc, base) -#define to_intel_crtc_state(x) container_of(x, struct intel_crtc_state, base) +#define to_intel_crtc_state(x) container_of(x, struct intel_crtc_state, uapi) #define to_intel_connector(x) container_of(x, struct intel_connector, base) #define to_intel_encoder(x) container_of(x, struct intel_encoder, base) #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base) #define to_intel_plane(x) container_of(x, struct intel_plane, base) -#define to_intel_plane_state(x) container_of(x, struct intel_plane_state, base) +#define to_intel_plane_state(x) container_of(x, struct intel_plane_state, uapi) #define intel_fb_obj(x) ((x) ? to_intel_bo((x)->obj[0]) : NULL) struct intel_hdmi { @@ -1138,6 +1251,7 @@ struct intel_dp { /* sink or branch descriptor */ struct drm_dp_desc desc; struct drm_dp_aux aux; + u32 aux_busy_last_status; u8 train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -1173,6 +1287,15 @@ struct intel_dp { bool can_mst; /* this port supports mst */ bool is_mst; int active_mst_links; + + /* + * DP_TP_* registers may be either on port or transcoder register space. + */ + struct { + i915_reg_t dp_tp_ctl; + i915_reg_t dp_tp_status; + } regs; + /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; @@ -1224,8 +1347,14 @@ struct intel_digital_port { /* Used for DP and ICL+ TypeC/DP and TypeC/HDMI ports. */ enum aux_ch aux_ch; enum intel_display_power_domain ddi_io_power_domain; + struct mutex tc_lock; /* protects the TypeC port mode */ + intel_wakeref_t tc_lock_wakeref; + int tc_link_refcount; bool tc_legacy_port:1; - enum tc_port_type tc_type; + char tc_port_name[8]; + enum tc_port_mode tc_mode; + enum phy_fia tc_phy_fia; + u8 tc_phy_fia_idx; void (*write_infoframe)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -1309,9 +1438,9 @@ struct intel_load_detect_pipe { }; static inline struct intel_encoder * -intel_attached_encoder(struct drm_connector *connector) +intel_attached_encoder(struct intel_connector *connector) { - return to_intel_connector(connector)->encoder; + return connector->encoder; } static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) @@ -1328,12 +1457,12 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) } static inline struct intel_digital_port * -enc_to_dig_port(struct drm_encoder *encoder) +enc_to_dig_port(struct intel_encoder *encoder) { - struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + struct intel_encoder *intel_encoder = encoder; if (intel_encoder_is_dig_port(intel_encoder)) - return container_of(encoder, struct intel_digital_port, + return container_of(&encoder->base, struct intel_digital_port, base.base); else return NULL; @@ -1342,16 +1471,17 @@ enc_to_dig_port(struct drm_encoder *encoder) static inline struct intel_digital_port * conn_to_dig_port(struct intel_connector *connector) { - return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base); + return enc_to_dig_port(intel_attached_encoder(connector)); } static inline struct intel_dp_mst_encoder * -enc_to_mst(struct drm_encoder *encoder) +enc_to_mst(struct intel_encoder *encoder) { - return container_of(encoder, struct intel_dp_mst_encoder, base.base); + return container_of(&encoder->base, struct intel_dp_mst_encoder, + base.base); } -static inline struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder) +static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->dp; } @@ -1364,14 +1494,14 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder) return true; case INTEL_OUTPUT_DDI: /* Skip pure HDMI/DVI DDI encoders */ - return i915_mmio_reg_valid(enc_to_intel_dp(&encoder->base)->output_reg); + return i915_mmio_reg_valid(enc_to_intel_dp(encoder)->output_reg); default: return false; } } static inline struct intel_lspcon * -enc_to_intel_lspcon(struct drm_encoder *encoder) +enc_to_intel_lspcon(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->lspcon; } @@ -1445,42 +1575,25 @@ intel_atomic_get_new_crtc_state(struct intel_atomic_state *state, &crtc->base)); } +static inline struct intel_digital_connector_state * +intel_atomic_get_new_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + return to_intel_digital_connector_state( + drm_atomic_get_new_connector_state(&state->base, + &connector->base)); +} + +static inline struct intel_digital_connector_state * +intel_atomic_get_old_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + return to_intel_digital_connector_state( + drm_atomic_get_old_connector_state(&state->base, + &connector->base)); +} + /* intel_display.c */ -void intel_plane_destroy(struct drm_plane *plane); -void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); -void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); -enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc); -int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); -int vlv_get_cck_clock(struct drm_i915_private *dev_priv, - const char *name, u32 reg, int ref_freq); -int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg); -void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); -void lpt_disable_iclkip(struct drm_i915_private *dev_priv); -void intel_init_display_hooks(struct drm_i915_private *dev_priv); -unsigned int intel_fb_xy_to_linear(int x, int y, - const struct intel_plane_state *state, - int plane); -unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, - int color_plane, unsigned int height); -void intel_add_fb_offsets(int *x, int *y, - const struct intel_plane_state *state, int plane); -unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); -unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info); -bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv); -int intel_display_suspend(struct drm_device *dev); -void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); -void intel_encoder_destroy(struct drm_encoder *encoder); -struct drm_display_mode * -intel_encoder_current_mode(struct intel_encoder *encoder); -bool intel_port_is_combophy(struct drm_i915_private *dev_priv, enum port port); -bool intel_port_is_tc(struct drm_i915_private *dev_priv, enum port port); -enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, - enum port port); -int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, - enum pipe pipe); static inline bool intel_crtc_has_type(const struct intel_crtc_state *crtc_state, enum intel_output_type type) @@ -1501,7 +1614,7 @@ intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) drm_wait_one_vblank(&dev_priv->drm, pipe); } static inline void -intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) +intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pipe) { const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -1509,108 +1622,9 @@ intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) intel_wait_for_vblank(dev_priv, pipe); } -u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); - -int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp); -void vlv_wait_port_ready(struct drm_i915_private *dev_priv, - struct intel_digital_port *dport, - unsigned int expected_mask); -int intel_get_load_detect_pipe(struct drm_connector *connector, - const struct drm_display_mode *mode, - struct intel_load_detect_pipe *old, - struct drm_modeset_acquire_ctx *ctx); -void intel_release_load_detect_pipe(struct drm_connector *connector, - struct intel_load_detect_pipe *old, - struct drm_modeset_acquire_ctx *ctx); -struct i915_vma * -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, - const struct i915_ggtt_view *view, - bool uses_fence, - unsigned long *out_flags); -void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); -struct drm_framebuffer * -intel_framebuffer_create(struct drm_i915_gem_object *obj, - struct drm_mode_fb_cmd2 *mode_cmd); -int intel_prepare_plane_fb(struct drm_plane *plane, - struct drm_plane_state *new_state); -void intel_cleanup_plane_fb(struct drm_plane *plane, - struct drm_plane_state *old_state); - -void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe); - -int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, - const struct dpll *dpll); -void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe); -int lpt_get_iclkip(struct drm_i915_private *dev_priv); -bool intel_fuzzy_clock_check(int clock1, int clock2); - -/* modesetting asserts */ -void assert_panel_unlocked(struct drm_i915_private *dev_priv, - enum pipe pipe); -void assert_pll(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state); -#define assert_pll_enabled(d, p) assert_pll(d, p, true) -#define assert_pll_disabled(d, p) assert_pll(d, p, false) -void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state); -#define assert_dsi_pll_enabled(d) assert_dsi_pll(d, true) -#define assert_dsi_pll_disabled(d) assert_dsi_pll(d, false) -void assert_fdi_rx_pll(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state); -#define assert_fdi_rx_pll_enabled(d, p) assert_fdi_rx_pll(d, p, true) -#define assert_fdi_rx_pll_disabled(d, p) assert_fdi_rx_pll(d, p, false) -void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state); -#define assert_pipe_enabled(d, p) assert_pipe(d, p, true) -#define assert_pipe_disabled(d, p) assert_pipe(d, p, false) -void intel_prepare_reset(struct drm_i915_private *dev_priv); -void intel_finish_reset(struct drm_i915_private *dev_priv); -void intel_dp_get_m_n(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config); -void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, - enum link_m_n_set m_n); -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); -int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); -bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, - struct dpll *best_clock); -int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); - -bool intel_crtc_active(struct intel_crtc *crtc); -bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); -void hsw_enable_ips(const struct intel_crtc_state *crtc_state); -void hsw_disable_ips(const struct intel_crtc_state *crtc_state); -enum intel_display_power_domain intel_port_to_power_domain(enum port port); -enum intel_display_power_domain -intel_aux_power_domain(struct intel_digital_port *dig_port); -void intel_mode_from_pipe_config(struct drm_display_mode *mode, - struct intel_crtc_state *pipe_config); -void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state); - -u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); -int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); -int skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format); - static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) { return i915_ggtt_offset(state->vma); } -u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); -u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); -u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); -u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state); -u32 skl_plane_stride(const struct intel_plane_state *plane_state, - int plane); -int skl_check_plane_surface(struct intel_plane_state *plane_state); -int i9xx_check_plane_surface(struct intel_plane_state *plane_state); -int skl_format_to_fourcc(int format, bool rgb_order, bool alpha); -unsigned int i9xx_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation); -int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); - -#endif /* __INTEL_DRV_H__ */ +#endif /* __INTEL_DISPLAY_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d0fc34826771..c7424e2a04a3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -44,15 +44,16 @@ #include "i915_debugfs.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" #include "intel_dp_mst.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" #include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" @@ -62,22 +63,18 @@ #include "intel_panel.h" #include "intel_psr.h" #include "intel_sideband.h" +#include "intel_tc.h" #include "intel_vdsc.h" #define DP_DPRX_ESI_LEN 14 -/* DP DSC small joiner has 2 FIFOs each of 640 x 6 bytes */ -#define DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER 61440 -#define DP_DSC_MIN_SUPPORTED_BPC 8 -#define DP_DSC_MAX_SUPPORTED_BPC 10 - /* DP DSC throughput values used for slice count calculations KPixels/s */ #define DP_DSC_PEAK_PIXEL_RATE 2720000 #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 976 +/* DP DSC FEC Overhead factor = 1/(0.972261) */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -149,9 +146,9 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp) return intel_dig_port->base.type == INTEL_OUTPUT_EDP; } -static struct intel_dp *intel_attached_dp(struct drm_connector *connector) +static struct intel_dp *intel_attached_dp(struct intel_connector *connector) { - return enc_to_intel_dp(&intel_attached_encoder(connector)->base); + return enc_to_intel_dp(intel_attached_encoder(connector)); } static void intel_dp_link_down(struct intel_encoder *encoder, @@ -211,47 +208,13 @@ static int intel_dp_max_common_rate(struct intel_dp *intel_dp) return intel_dp->common_rates[intel_dp->num_common_rates - 1]; } -static int intel_dp_get_fia_supported_lane_count(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); - intel_wakeref_t wakeref; - u32 lane_info; - - if (tc_port == PORT_TC_NONE || dig_port->tc_type != TC_PORT_TYPEC) - return 4; - - lane_info = 0; - with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref) - lane_info = (I915_READ(PORT_TX_DFLEXDPSP) & - DP_LANE_ASSIGNMENT_MASK(tc_port)) >> - DP_LANE_ASSIGNMENT_SHIFT(tc_port); - - switch (lane_info) { - default: - MISSING_CASE(lane_info); - /* fall through */ - case 1: - case 2: - case 4: - case 8: - return 1; - case 3: - case 12: - return 2; - case 15: - return 4; - } -} - /* Theoretical max between source and sink */ static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); int source_max = intel_dig_port->max_lanes; int sink_max = drm_dp_max_lane_count(intel_dp->dpcd); - int fia_max = intel_dp_get_fia_supported_lane_count(intel_dp); + int fia_max = intel_tc_port_fia_max_lane_count(intel_dig_port); return min3(source_max, sink_max, fia_max); } @@ -330,9 +293,9 @@ static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); - if (intel_port_is_combophy(dev_priv, port) && + if (intel_phy_is_combo(dev_priv, phy) && !IS_ELKHARTLAKE(dev_priv) && !intel_dp_is_edp(intel_dp)) return 540000; @@ -526,11 +489,132 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock) +{ + return div_u64(mul_u32_u32(mode_clock, 1000000U), + DP_DSC_FEC_OVERHEAD_FACTOR); +} + +static int +small_joiner_ram_size_bits(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) + return 7680 * 8; + else + return 6144 * 8; +} + +static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915, + u32 link_clock, u32 lane_count, + u32 mode_clock, u32 mode_hdisplay) +{ + u32 bits_per_pixel, max_bpp_small_joiner_ram; + int i; + + /* + * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* + * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP) + * for SST -> TimeSlotsPerMTP is 1, + * for MST -> TimeSlotsPerMTP has to be calculated + */ + bits_per_pixel = (link_clock * lane_count * 8) / + intel_dp_mode_to_fec_clock(mode_clock); + DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); + + /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ + max_bpp_small_joiner_ram = small_joiner_ram_size_bits(i915) / + mode_hdisplay; + DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); + + /* + * Greatest allowed DSC BPP = MIN (output BPP from available Link BW + * check, output bpp from small joiner RAM check) + */ + bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); + + /* Error out if the max bpp is less than smallest allowed valid bpp */ + if (bits_per_pixel < valid_dsc_bpp[0]) { + DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n", + bits_per_pixel, valid_dsc_bpp[0]); + return 0; + } + + /* Find the nearest match in the array of known BPPs from VESA */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { + if (bits_per_pixel < valid_dsc_bpp[i + 1]) + break; + } + bits_per_pixel = valid_dsc_bpp[i]; + + /* + * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, + * fractional part is 0 + */ + return bits_per_pixel << 4; +} + +static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, + int mode_clock, int mode_hdisplay) +{ + u8 min_slice_count, i; + int max_slice_width; + + if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_0); + else + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_1); + + max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); + if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { + DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", + max_slice_width); + return 0; + } + /* Also take into account max slice width */ + min_slice_count = min_t(u8, min_slice_count, + DIV_ROUND_UP(mode_hdisplay, + max_slice_width)); + + /* Find the closest match to the valid slice count values */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { + if (valid_dsc_slicecount[i] > + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + false)) + break; + if (min_slice_count <= valid_dsc_slicecount[i]) + return valid_dsc_slicecount[i]; + } + + DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); + return 0; +} + +static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, + int hdisplay) +{ + /* + * Older platforms don't like hdisplay==4096 with DP. + * + * On ILK/SNB/IVB the pipe seems to be somewhat running (scanline + * and frame counter increment), but we don't get vblank interrupts, + * and the pipe underruns immediately. The link also doesn't seem + * to get trained properly. + * + * On CHV the vblank interrupts don't seem to disappear but + * otherwise the symptoms are similar. + * + * TODO: confirm the behaviour on HSW+ + */ + return hdisplay == 4096 && !HAS_DDI(dev_priv); +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; struct drm_i915_private *dev_priv = to_i915(connector->dev); @@ -561,6 +645,9 @@ intel_dp_mode_valid(struct drm_connector *connector, max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); mode_rate = intel_dp_link_required(target_clock, 18); + if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay)) + return MODE_H_ILLEGAL; + /* * Output bpp is stored in 6.4 format so right shift by 4 to get the * integer value since we support only integer values of bpp. @@ -575,7 +662,8 @@ intel_dp_mode_valid(struct drm_connector *connector, true); } else if (drm_dp_sink_supports_fec(intel_dp->fec_capable)) { dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(max_link_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + max_link_clock, max_lanes, target_clock, mode->hdisplay) >> 4; @@ -596,7 +684,7 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } u32 intel_dp_pack_aux(const u8 *src, int src_bytes) @@ -673,12 +761,14 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) u32 DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, - "skipping pipe %c power sequencer kick due to port %c being active\n", - pipe_name(pipe), port_name(intel_dig_port->base.port))) + "skipping pipe %c power sequencer kick due to [ENCODER:%d:%s] being active\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name)) return; - DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("kicking pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* Preserve the BIOS-computed detected bit. This is * supposed to be read-only. @@ -744,7 +834,7 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv) * Pick one that's not used by other ports. */ for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (encoder->type == INTEL_OUTPUT_EDP) { WARN_ON(intel_dp->active_pipe != INVALID_PIPE && @@ -796,9 +886,10 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) vlv_steal_power_sequencer(dev_priv, pipe); intel_dp->pps_pipe = pipe; - DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", + DRM_DEBUG_KMS("picked pipe %c power sequencer for [ENCODER:%d:%s]\n", pipe_name(intel_dp->pps_pipe), - port_name(intel_dig_port->base.port)); + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -906,13 +997,16 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) /* didn't find one? just let vlv_power_sequencer_pipe() pick one when needed */ if (intel_dp->pps_pipe == INVALID_PIPE) { - DRM_DEBUG_KMS("no initial power sequencer for port %c\n", - port_name(port)); + DRM_DEBUG_KMS("no initial power sequencer for [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return; } - DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n", - port_name(port), pipe_name(intel_dp->pps_pipe)); + DRM_DEBUG_KMS("initial power sequencer for [ENCODER:%d:%s]: pipe %c\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, + pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(intel_dp); intel_dp_init_panel_power_sequencer_registers(intel_dp, false); @@ -937,7 +1031,7 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) */ for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); WARN_ON(intel_dp->active_pipe != INVALID_PIPE); @@ -1085,18 +1179,20 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); + const unsigned int timeout_ms = 10; u32 status; bool done; #define C (((status = intel_uncore_read_notrace(&i915->uncore, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) done = wait_event_timeout(i915->gmbus_wait_queue, C, - msecs_to_jiffies_timeout(10)); + msecs_to_jiffies_timeout(timeout_ms)); /* just trace the final value */ trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (!done) - DRM_ERROR("dp aux hw did not signal timeout!\n"); + DRM_ERROR("%s did not complete or timeout within %ums (status 0x%08x)\n", + intel_dp->aux.name, timeout_ms, status); #undef C return status; @@ -1197,6 +1293,9 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *i915 = + to_i915(intel_dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port); u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | @@ -1209,7 +1308,8 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); - if (intel_dig_port->tc_type == TC_PORT_TBT) + if (intel_phy_is_tc(i915, phy) && + intel_dig_port->tc_mode == TC_PORT_TBT_ALT) ret |= DP_AUX_CH_CTL_TBT_IO; return ret; @@ -1225,6 +1325,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); struct intel_uncore *uncore = &i915->uncore; + enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port); + bool is_tc_port = intel_phy_is_tc(i915, phy); i915_reg_t ch_ctl, ch_data[5]; u32 aux_clock_divider; enum intel_display_power_domain aux_domain = @@ -1240,6 +1342,9 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, for (i = 0; i < ARRAY_SIZE(ch_data); i++) ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); + if (is_tc_port) + intel_tc_port_lock(intel_dig_port); + aux_wakeref = intel_display_power_get(i915, aux_domain); pps_wakeref = pps_lock(intel_dp); @@ -1270,13 +1375,12 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (try == 3) { - static u32 last_status = -1; const u32 status = intel_uncore_read(uncore, ch_ctl); - if (status != last_status) { + if (status != intel_dp->aux_busy_last_status) { WARN(1, "dp_aux_ch not started status 0x%08x\n", status); - last_status = status; + intel_dp->aux_busy_last_status = status; } ret = -EBUSY; @@ -1392,6 +1496,9 @@ out: pps_unlock(intel_dp, pps_wakeref); intel_display_power_put_async(i915, aux_domain, aux_wakeref); + if (is_tc_port) + intel_tc_port_unlock(intel_dig_port); + return ret; } @@ -1565,6 +1672,7 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_CTL(aux_ch); default: MISSING_CASE(aux_ch); @@ -1585,6 +1693,7 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_DATA(aux_ch, index); default: MISSING_CASE(aux_ch); @@ -1705,7 +1814,7 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) { char str[128]; /* FIXME: too big for stack? */ - if ((drm_debug & DRM_UT_KMS) == 0) + if (!drm_debug_enabled(DRM_UT_KMS)) return; snprintf_int_array(str, sizeof(str), @@ -1763,8 +1872,14 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - return INTEL_GEN(dev_priv) >= 11 && - pipe_config->cpu_transcoder != TRANSCODER_A; + /* On TGL, FEC is supported on all Pipes */ + if (INTEL_GEN(dev_priv) >= 12) + return true; + + if (IS_GEN(dev_priv, 11) && pipe_config->cpu_transcoder != TRANSCODER_A) + return true; + + return false; } static bool intel_dp_supports_fec(struct intel_dp *intel_dp, @@ -1774,22 +1889,15 @@ static bool intel_dp_supports_fec(struct intel_dp *intel_dp, drm_dp_sink_supports_fec(intel_dp->fec_capable); } -static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config) -{ - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - - return INTEL_GEN(dev_priv) >= 10 && - pipe_config->cpu_transcoder != TRANSCODER_A; -} - static bool intel_dp_supports_dsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *crtc_state) { - if (!intel_dp_is_edp(intel_dp) && !pipe_config->fec_enable) + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + + if (!intel_dp_is_edp(intel_dp) && !crtc_state->fec_enable) return false; - return intel_dp_source_supports_dsc(intel_dp, pipe_config) && + return intel_dsc_source_support(encoder, crtc_state) && drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd); } @@ -1874,13 +1982,15 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, const struct link_config_limits *limits) { - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int bpp, clock, lane_count; int mode_rate, link_clock, link_avail; for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { + int output_bpp = intel_dp_output_bpp(pipe_config, bpp); + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); + output_bpp); for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { for (lane_count = limits->min_lane_count; @@ -1919,6 +2029,63 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) return 0; } +#define DSC_SUPPORTED_VERSION_MIN 1 + +static int intel_dp_dsc_compute_params(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + u8 line_buf_depth; + int ret; + + ret = intel_dsc_compute_params(encoder, crtc_state); + if (ret) + return ret; + + /* + * Slice Height of 8 works for all currently available panels. So start + * with that if pic_height is an integral multiple of 8. Eventually add + * logic to try multiple slice heights. + */ + if (vdsc_cfg->pic_height % 8 == 0) + vdsc_cfg->slice_height = 8; + else if (vdsc_cfg->pic_height % 4 == 0) + vdsc_cfg->slice_height = 4; + else + vdsc_cfg->slice_height = 2; + + vdsc_cfg->dsc_version_major = + (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] & + DP_DSC_MAJOR_MASK) >> DP_DSC_MAJOR_SHIFT; + vdsc_cfg->dsc_version_minor = + min(DSC_SUPPORTED_VERSION_MIN, + (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] & + DP_DSC_MINOR_MASK) >> DP_DSC_MINOR_SHIFT); + + vdsc_cfg->convert_rgb = intel_dp->dsc_dpcd[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT] & + DP_DSC_RGB; + + line_buf_depth = drm_dp_dsc_sink_line_buf_depth(intel_dp->dsc_dpcd); + if (!line_buf_depth) { + DRM_DEBUG_KMS("DSC Sink Line Buffer Depth invalid\n"); + return -EINVAL; + } + + if (vdsc_cfg->dsc_version_minor == 2) + vdsc_cfg->line_buf_depth = (line_buf_depth == DSC_1_2_MAX_LINEBUF_DEPTH_BITS) ? + DSC_1_2_MAX_LINEBUF_DEPTH_VAL : line_buf_depth; + else + vdsc_cfg->line_buf_depth = (line_buf_depth > DSC_1_1_MAX_LINEBUF_DEPTH_BITS) ? + DSC_1_1_MAX_LINEBUF_DEPTH_BITS : line_buf_depth; + + vdsc_cfg->block_pred_enable = + intel_dp->dsc_dpcd[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] & + DP_DSC_BLK_PREDICTION_IS_SUPPORTED; + + return drm_dsc_compute_rc_parameters(vdsc_cfg); +} + static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state, @@ -1926,7 +2093,7 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; u8 dsc_max_bpc; int pipe_bpp; int ret; @@ -1937,11 +2104,17 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return -EINVAL; - dsc_max_bpc = min_t(u8, DP_DSC_MAX_SUPPORTED_BPC, - conn_state->max_requested_bpc); + /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ + if (INTEL_GEN(dev_priv) >= 12) + dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc); + else + dsc_max_bpc = min_t(u8, 10, + conn_state->max_requested_bpc); pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, dsc_max_bpc); - if (pipe_bpp < DP_DSC_MIN_SUPPORTED_BPC * 3) { + + /* Min Input BPC for ICL+ is 8 */ + if (pipe_bpp < 8 * 3) { DRM_DEBUG_KMS("No DSC support for less than 8bpc\n"); return -EINVAL; } @@ -1956,10 +2129,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->lane_count = limits->max_lane_count; if (intel_dp_is_edp(intel_dp)) { - pipe_config->dsc_params.compressed_bpp = + pipe_config->dsc.compressed_bpp = min_t(u16, drm_edp_dsc_sink_output_bpp(intel_dp->dsc_dpcd) >> 4, pipe_config->pipe_bpp); - pipe_config->dsc_params.slice_count = + pipe_config->dsc.slice_count = drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, true); } else { @@ -1967,7 +2140,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, u8 dsc_dp_slice_count; dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(pipe_config->port_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + pipe_config->port_clock, pipe_config->lane_count, adjusted_mode->crtc_clock, adjusted_mode->crtc_hdisplay); @@ -1979,10 +2153,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, DRM_DEBUG_KMS("Compressed BPP/Slice Count not supported\n"); return -EINVAL; } - pipe_config->dsc_params.compressed_bpp = min_t(u16, + pipe_config->dsc.compressed_bpp = min_t(u16, dsc_max_output_bpp >> 4, pipe_config->pipe_bpp); - pipe_config->dsc_params.slice_count = dsc_dp_slice_count; + pipe_config->dsc.slice_count = dsc_dp_slice_count; } /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate @@ -1990,29 +2164,29 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, * then we need to use 2 VDSC instances. */ if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) { - if (pipe_config->dsc_params.slice_count > 1) { - pipe_config->dsc_params.dsc_split = true; + if (pipe_config->dsc.slice_count > 1) { + pipe_config->dsc.dsc_split = true; } else { DRM_DEBUG_KMS("Cannot split stream to use 2 VDSC instances\n"); return -EINVAL; } } - ret = intel_dp_compute_dsc_params(intel_dp, pipe_config); + ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config); if (ret < 0) { DRM_DEBUG_KMS("Cannot compute valid DSC parameters for Input Bpp = %d " "Compressed BPP = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp); + pipe_config->dsc.compressed_bpp); return ret; } - pipe_config->dsc_params.compression_enable = true; + pipe_config->dsc.compression_enable = true; DRM_DEBUG_KMS("DP DSC computed with Input Bpp = %d " "Compressed Bpp = %d Slice Count = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp, - pipe_config->dsc_params.slice_count); + pipe_config->dsc.compressed_bpp, + pipe_config->dsc.slice_count); return 0; } @@ -2030,8 +2204,8 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct link_config_limits limits; int common_len; int ret; @@ -2086,15 +2260,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, return ret; } - if (pipe_config->dsc_params.compression_enable) { + if (pipe_config->dsc.compression_enable) { DRM_DEBUG_KMS("DP lane count %d clock %d Input bpp %d Compressed bpp %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp); + pipe_config->dsc.compressed_bpp); DRM_DEBUG_KMS("DP link rate required %i available %i\n", intel_dp_link_required(adjusted_mode->crtc_clock, - pipe_config->dsc_params.compressed_bpp), + pipe_config->dsc.compressed_bpp), intel_dp_max_data_rate(pipe_config->port_clock, pipe_config->lane_count)); } else { @@ -2118,8 +2292,8 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp, { const struct drm_display_info *info = &connector->display_info; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + &crtc_state->hw.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); int ret; if (!drm_mode_is_420_only(info, adjusted_mode) || @@ -2147,7 +2321,17 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, const struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; + + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* @@ -2164,17 +2348,28 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, } } +static bool intel_dp_port_has_audio(struct drm_i915_private *dev_priv, + enum port port) +{ + if (IS_G4X(dev_priv)) + return false; + if (INTEL_GEN(dev_priv) < 12 && port == PORT_A) + return false; + + return true; +} + int intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base); + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); enum port port = encoder->port; - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); @@ -2186,6 +2381,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + if (lspcon->active) lspcon_ycbcr420_config(&intel_connector->base, pipe_config); else @@ -2196,7 +2392,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, return ret; pipe_config->has_drrs = false; - if (IS_G4X(dev_priv) || port == PORT_A) + if (!intel_dp_port_has_audio(dev_priv, port)) pipe_config->has_audio = false; else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) pipe_config->has_audio = intel_dp->has_audio; @@ -2231,6 +2427,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; + if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay)) + return -EINVAL; + ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); if (ret < 0) return ret; @@ -2238,8 +2437,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (pipe_config->dsc_params.compression_enable) - output_bpp = pipe_config->dsc_params.compressed_bpp; + if (pipe_config->dsc.compression_enable) + output_bpp = pipe_config->dsc.compressed_bpp; else output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp); @@ -2248,7 +2447,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, - constant_n); + constant_n, pipe_config->fec_enable); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -2258,7 +2457,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, - constant_n); + constant_n, pipe_config->fec_enable); } if (!HAS_DDI(dev_priv)) @@ -2283,16 +2482,19 @@ static void intel_dp_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; intel_dp_set_link_params(intel_dp, pipe_config->port_clock, pipe_config->lane_count, intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + /* * There are four kinds of DP registers: * @@ -2307,7 +2509,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, * * CPT PCH is quite different, having many bits moved * to the TRANS_DP_CTL register instead. That - * configuration happens (oddly) in ironlake_pch_enable + * configuration happens (oddly) in ilk_pch_enable */ /* Preserve the BIOS-computed detected bit. This is @@ -2393,9 +2595,8 @@ static void wait_panel_status(struct intel_dp *intel_dp, I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); - if (intel_wait_for_register(&dev_priv->uncore, - pp_stat_reg, mask, value, - 5000)) + if (intel_de_wait_for_register(dev_priv, pp_stat_reg, + mask, value, 5000)) DRM_ERROR("Panel status timeout: status %08x control %08x\n", I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); @@ -2452,7 +2653,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp) * is locked */ -static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) +static u32 ilk_get_pp_control(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 control; @@ -2495,13 +2696,14 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) intel_display_power_get(dev_priv, intel_aux_power_domain(intel_dig_port)); - DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD on\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp |= EDP_FORCE_VDD; pp_stat_reg = _pp_stat_reg(intel_dp); @@ -2515,8 +2717,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) * If the panel wasn't on, delay before accessing aux channel */ if (!edp_have_panel_power(intel_dp)) { - DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] panel power wasn't enabled\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); msleep(intel_dp->panel_power_up_delay); } @@ -2541,8 +2744,9 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) vdd = false; with_pps_lock(intel_dp, wakeref) vdd = edp_panel_vdd_on(intel_dp); - I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!vdd, "[ENCODER:%d:%s] VDD already requested on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); } static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) @@ -2560,10 +2764,11 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if (!edp_have_panel_vdd(intel_dp)) return; - DRM_DEBUG_KMS("Turning eDP port %c VDD off\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD off\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -2623,8 +2828,9 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) if (!intel_dp_is_edp(intel_dp)) return; - I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!intel_dp->want_panel_vdd, "[ENCODER:%d:%s] VDD not forced on", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); intel_dp->want_panel_vdd = false; @@ -2645,18 +2851,20 @@ static void edp_panel_on(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); if (WARN(edp_have_panel_power(intel_dp), - "eDP port %c panel power already on\n", - port_name(dp_to_dig_port(intel_dp)->base.port))) + "[ENCODER:%d:%s] panel power already on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name)) return; wait_panel_power_cycle(intel_dp); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); if (IS_GEN(dev_priv, 5)) { /* ILK workaround: disable reset around power sequence */ pp &= ~PANEL_POWER_RESET; @@ -2705,13 +2913,13 @@ static void edp_panel_off(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power off\n", - port_name(dig_port->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power off\n", + dig_port->base.base.base.id, dig_port->base.base.name); - WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n", - port_name(dig_port->base.port)); + WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n", + dig_port->base.base.base.id, dig_port->base.base.name); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | @@ -2760,7 +2968,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); u32 pp; - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp |= EDP_BLC_ENABLE; I915_WRITE(pp_ctrl_reg, pp); @@ -2772,7 +2980,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder)); if (!intel_dp_is_edp(intel_dp)) return; @@ -2796,7 +3004,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); u32 pp; - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp &= ~EDP_BLC_ENABLE; I915_WRITE(pp_ctrl_reg, pp); @@ -2810,7 +3018,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) /* Disable backlight PP control and backlight PWM. */ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)); if (!intel_dp_is_edp(intel_dp)) return; @@ -2828,13 +3036,13 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) static void intel_edp_backlight_power(struct intel_connector *connector, bool enable) { - struct intel_dp *intel_dp = intel_attached_dp(&connector->base); + struct intel_dp *intel_dp = intel_attached_dp(connector); intel_wakeref_t wakeref; bool is_enabled; is_enabled = false; with_pps_lock(intel_dp, wakeref) - is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; + is_enabled = ilk_get_pp_control(intel_dp) & EDP_BLC_ENABLE; if (is_enabled == enable) return; @@ -2854,8 +3062,8 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state) bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN; I915_STATE_WARN(cur_state != state, - "DP port %c state assertion failure (expected %s, current %s)\n", - port_name(dig_port->base.port), + "[ENCODER:%d:%s] state assertion failure (expected %s, current %s)\n", + dig_port->base.base.base.id, dig_port->base.base.name, onoff(state), onoff(cur_state)); } #define assert_dp_port_disabled(d) assert_dp_port((d), false) @@ -2871,13 +3079,13 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state) #define assert_edp_pll_enabled(d) assert_edp_pll((d), true) #define assert_edp_pll_disabled(d) assert_edp_pll((d), false) -static void ironlake_edp_pll_on(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config) +static void ilk_edp_pll_on(struct intel_dp *intel_dp, + const struct intel_crtc_state *pipe_config) { - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); assert_dp_port_disabled(intel_dp); assert_edp_pll_disabled(dev_priv); @@ -2911,13 +3119,13 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp, udelay(200); } -static void ironlake_edp_pll_off(struct intel_dp *intel_dp, - const struct intel_crtc_state *old_crtc_state) +static void ilk_edp_pll_off(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder); assert_dp_port_disabled(intel_dp); assert_edp_pll_enabled(dev_priv); @@ -2951,7 +3159,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, { int ret; - if (!crtc_state->dsc_params.compression_enable) + if (!crtc_state->dsc.compression_enable) return; ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE, @@ -3050,7 +3258,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_wakeref_t wakeref; bool ret; @@ -3071,10 +3279,10 @@ static void intel_dp_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); u32 tmp, flags = 0; enum port port = encoder->port; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); if (encoder->type == INTEL_OUTPUT_EDP) pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); @@ -3109,7 +3317,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_NVSYNC; } - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; if (IS_G4X(dev_priv) && tmp & DP_COLOR_RANGE_16_235) pipe_config->limited_color_range = true; @@ -3126,7 +3334,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->port_clock = 270000; } - pipe_config->base.adjusted_mode.crtc_clock = + pipe_config->hw.adjusted_mode.crtc_clock = intel_dotclock_calculate(pipe_config->port_clock, &pipe_config->dp_m_n); @@ -3155,7 +3363,7 @@ static void intel_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_dp->link_trained = false; @@ -3189,7 +3397,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; /* @@ -3202,7 +3410,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder, /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_off(intel_dp, old_crtc_state); + ilk_edp_pll_off(intel_dp, old_crtc_state); } static void vlv_post_disable_dp(struct intel_encoder *encoder, @@ -3243,7 +3451,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, dp_train_pat & train_pat_mask); if (HAS_DDI(dev_priv)) { - u32 temp = I915_READ(DP_TP_CTL(port)); + u32 temp = I915_READ(intel_dp->regs.dp_tp_ctl); if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) temp |= DP_TP_CTL_SCRAMBLE_DISABLE; @@ -3269,7 +3477,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, temp |= DP_TP_CTL_LINK_TRAIN_PAT4; break; } - I915_WRITE(DP_TP_CTL(port), temp); + I915_WRITE(intel_dp->regs.dp_tp_ctl, temp); } else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { @@ -3340,8 +3548,8 @@ static void intel_enable_dp(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); u32 dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; intel_wakeref_t wakeref; @@ -3400,14 +3608,14 @@ static void g4x_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; intel_dp_prepare(encoder, pipe_config); /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_on(intel_dp, pipe_config); + ilk_edp_pll_on(intel_dp, pipe_config); } static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) @@ -3433,8 +3641,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) * port select always when logically disconnecting a power sequencer * from a port. */ - DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("detaching pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); I915_WRITE(pp_on_reg, 0); POSTING_READ(pp_on_reg); @@ -3449,18 +3658,19 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, lockdep_assert_held(&dev_priv->pps_mutex); for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = encoder->port; + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); WARN(intel_dp->active_pipe == pipe, - "stealing pipe %c power sequencer from active (e)DP port %c\n", - pipe_name(pipe), port_name(port)); + "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); if (intel_dp->pps_pipe != pipe) continue; - DRM_DEBUG_KMS("stealing pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(port)); + DRM_DEBUG_KMS("stealing pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); /* make sure vdd is off before we steal it */ vlv_detach_power_sequencer(intel_dp); @@ -3471,8 +3681,8 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); lockdep_assert_held(&dev_priv->pps_mutex); @@ -3502,8 +3712,9 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, /* now it's all ours */ intel_dp->pps_pipe = crtc->pipe; - DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n", - pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); + DRM_DEBUG_KMS("initializing pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(intel_dp->pps_pipe), encoder->base.base.id, + encoder->base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -3967,25 +4178,23 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) if (!HAS_DDI(dev_priv)) return; - val = I915_READ(DP_TP_CTL(port)); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_LINK_TRAIN_MASK; val |= DP_TP_CTL_LINK_TRAIN_IDLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); /* - * On PORT_A we can have only eDP in SST mode. There the only reason - * we need to set idle transmission mode is to work around a HW issue - * where we enable the pipe while not in idle link-training mode. + * Until TGL on PORT_A we can have only eDP in SST mode. There the only + * reason we need to set idle transmission mode is to work around a HW + * issue where we enable the pipe while not in idle link-training mode. * In this case there is requirement to wait for a minimum number of * idle patterns to be sent. */ - if (port == PORT_A) + if (port == PORT_A && INTEL_GEN(dev_priv) < 12) return; - if (intel_wait_for_register(&dev_priv->uncore, DP_TP_STATUS(port), - DP_TP_STATUS_IDLE_DONE, - DP_TP_STATUS_IDLE_DONE, - 1)) + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_IDLE_DONE, 1)) DRM_ERROR("Timed out waiting for DP idle patterns\n"); } @@ -3994,8 +4203,8 @@ intel_dp_link_down(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum port port = encoder->port; u32 DP = intel_dp->DP; @@ -4169,10 +4378,6 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd)); - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) - dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & - DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - /* * Read the eDP display control registers. * @@ -4244,8 +4449,14 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) if (!intel_dp_read_dpcd(intel_dp)) return false; - /* Don't clobber cached eDP rates. */ + /* + * Don't clobber cached eDP rates. Also skip re-reading + * the OUI/ID since we know it won't change. + */ if (!intel_dp_is_edp(intel_dp)) { + drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, + drm_dp_is_branch(intel_dp->dpcd)); + intel_dp_set_sink_rates(intel_dp); intel_dp_set_common_rates(intel_dp); } @@ -4254,7 +4465,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * Some eDP panels do not set a valid value for sink count, that is why * it don't care about read it here and in intel_edp_init_dpcd(). */ - if (!intel_dp_is_edp(intel_dp)) { + if (!intel_dp_is_edp(intel_dp) && + !drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_NO_SINK_COUNT)) { u8 count; ssize_t r; @@ -4323,9 +4535,10 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) &dp_to_dig_port(intel_dp)->base; bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); - DRM_DEBUG_KMS("MST support? port %c: %s, sink: %s, modparam: %s\n", - port_name(encoder->port), yesno(intel_dp->can_mst), - yesno(sink_can_mst), yesno(i915_modparams.enable_dp_mst)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", + encoder->base.base.id, encoder->base.name, + yesno(intel_dp->can_mst), yesno(sink_can_mst), + yesno(i915_modparams.enable_dp_mst)); if (!intel_dp->can_mst) return; @@ -4345,94 +4558,36 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay) +bool +intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - u16 bits_per_pixel, max_bpp_small_joiner_ram; - int i; - - /* - * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* - * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP) - * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1, - * for MST -> TimeSlotsPerMTP has to be calculated - */ - bits_per_pixel = (link_clock * lane_count * 8 * - DP_DSC_FEC_OVERHEAD_FACTOR) / - mode_clock; - - /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / - mode_hdisplay; - - /* - * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW - * check, output bpp from small joiner RAM check) - */ - bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); - - /* Error out if the max bpp is less than smallest allowed valid bpp */ - if (bits_per_pixel < valid_dsc_bpp[0]) { - DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel); - return 0; - } - - /* Find the nearest match in the array of known BPPs from VESA */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { - if (bits_per_pixel < valid_dsc_bpp[i + 1]) - break; - } - bits_per_pixel = valid_dsc_bpp[i]; - /* - * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, - * fractional part is 0 + * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication + * of Color Encoding Format and Content Color Gamut], in order to + * sending YCBCR 420 or HDR BT.2020 signals we should use DP VSC SDP. */ - return bits_per_pixel << 4; -} - -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, - int mode_clock, - int mode_hdisplay) -{ - u8 min_slice_count, i; - int max_slice_width; - - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); - - max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); - if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", - max_slice_width); - return 0; - } - /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, - DIV_ROUND_UP(mode_hdisplay, - max_slice_width)); + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + return true; - /* Find the closest match to the valid slice count values */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { - if (valid_dsc_slicecount[i] > - drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, - false)) - break; - if (min_slice_count <= valid_dsc_slicecount[i]) - return valid_dsc_slicecount[i]; + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_SYCC_601: + case DRM_MODE_COLORIMETRY_OPYCC_601: + case DRM_MODE_COLORIMETRY_BT2020_YCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + return true; + default: + break; } - DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); - return 0; + return false; } static void -intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct dp_sdp vsc_sdp = {}; @@ -4453,13 +4608,55 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, */ vsc_sdp.sdp_header.HB3 = 0x13; - /* - * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h - * DB16[3:0] DP 1.4a spec, Table 2-120 - */ - vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/ - /* RGB->YCBCR color conversion uses the BT.709 color space. */ - vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + /* DP 1.4a spec, Table 2-120 */ + switch (crtc_state->output_format) { + case INTEL_OUTPUT_FORMAT_YCBCR444: + vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */ + break; + case INTEL_OUTPUT_FORMAT_YCBCR420: + vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */ + break; + case INTEL_OUTPUT_FORMAT_RGB: + default: + /* RGB: DB16[7:4] = 0h */ + break; + } + + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_BT709_YCC: + vsc_sdp.db[16] |= 0x1; + break; + case DRM_MODE_COLORIMETRY_XVYCC_601: + vsc_sdp.db[16] |= 0x2; + break; + case DRM_MODE_COLORIMETRY_XVYCC_709: + vsc_sdp.db[16] |= 0x3; + break; + case DRM_MODE_COLORIMETRY_SYCC_601: + vsc_sdp.db[16] |= 0x4; + break; + case DRM_MODE_COLORIMETRY_OPYCC_601: + vsc_sdp.db[16] |= 0x5; + break; + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + vsc_sdp.db[16] |= 0x6; + break; + case DRM_MODE_COLORIMETRY_BT2020_YCC: + vsc_sdp.db[16] |= 0x7; + break; + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: + vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */ + break; + default: + /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */ + + /* RGB->YCBCR color conversion uses the BT.709 color space. */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + break; + } /* * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, @@ -4511,13 +4708,106 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); } -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +static void +intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct dp_sdp infoframe_sdp = {}; + struct hdmi_drm_infoframe drm_infoframe = {}; + const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE; + unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE]; + ssize_t len; + int ret; + + ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state); + if (ret) { + DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n"); + return; + } + + len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf)); + if (len < 0) { + DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n"); + return; + } + + if (len != infoframe_size) { + DRM_DEBUG_KMS("wrong static hdr metadata size\n"); + return; + } + + /* + * Set up the infoframe sdp packet for HDR static metadata. + * Prepare VSC Header for SU as per DP 1.4a spec, + * Table 2-100 and Table 2-101 + */ + + /* Packet ID, 00h for non-Audio INFOFRAME */ + infoframe_sdp.sdp_header.HB0 = 0; + /* + * Packet Type 80h + Non-audio INFOFRAME Type value + * HDMI_INFOFRAME_TYPE_DRM: 0x87, + */ + infoframe_sdp.sdp_header.HB1 = drm_infoframe.type; + /* + * Least Significant Eight Bits of (Data Byte Count – 1) + * infoframe_size - 1, + */ + infoframe_sdp.sdp_header.HB2 = 0x1D; + /* INFOFRAME SDP Version Number */ + infoframe_sdp.sdp_header.HB3 = (0x13 << 2); + /* CTA Header Byte 2 (INFOFRAME Version Number) */ + infoframe_sdp.db[0] = drm_infoframe.version; + /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ + infoframe_sdp.db[1] = drm_infoframe.length; + /* + * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after + * HDMI_INFOFRAME_HEADER_SIZE + */ + BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2); + memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE], + HDMI_DRM_INFOFRAME_SIZE); + + /* + * Size of DP infoframe sdp packet for HDR static metadata is consist of + * - DP SDP Header(struct dp_sdp_header): 4 bytes + * - Two Data Blocks: 2 bytes + * CTA Header Byte2 (INFOFRAME Version Number) + * CTA Header Byte3 (Length of INFOFRAME) + * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes + * + * Prior to GEN11's GMP register size is identical to DP HDR static metadata + * infoframe size. But GEN11+ has larger than that size, write_infoframe + * will pad rest of the size. + */ + intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state, + HDMI_PACKET_TYPE_GAMUT_METADATA, + &infoframe_sdp, + sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE); +} + +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + return; + + intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); +} + +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) + if (!conn_state->hdr_output_metadata) return; - intel_pixel_encoding_setup_vsc(intel_dp, crtc_state); + intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp, + crtc_state, + conn_state); } static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) @@ -4613,7 +4903,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); /* Set test active flag here so userspace doesn't interrupt things */ - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; return DP_TEST_ACK; } @@ -4657,7 +4947,7 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp) } /* Set test active flag here so userspace doesn't interrupt things */ - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; return test_result; } @@ -4806,7 +5096,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_connector *connector = intel_dp->attached_connector; struct drm_connector_state *conn_state; struct intel_crtc_state *crtc_state; @@ -4837,7 +5127,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, WARN_ON(!intel_crtc_has_dp_encoder(crtc_state)); - if (!crtc_state->base.active) + if (!crtc_state->hw.active) return 0; if (conn_state->commit && @@ -4879,14 +5169,16 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, * retrain the link to get a picture. That's in case no * userspace component reacted to intermittent HPD dip. */ -static bool intel_dp_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector) +static enum intel_hotplug_state +intel_dp_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received) { struct drm_modeset_acquire_ctx ctx; - bool changed; + enum intel_hotplug_state state; int ret; - changed = intel_encoder_hotplug(encoder, connector); + state = intel_encoder_hotplug(encoder, connector, irq_received); drm_modeset_acquire_init(&ctx, 0); @@ -4905,7 +5197,14 @@ static bool intel_dp_hotplug(struct intel_encoder *encoder, drm_modeset_acquire_fini(&ctx); WARN(ret, "Acquiring modeset locks failed with %i\n", ret); - return changed; + /* + * Keeping it consistent with intel_ddi_hotplug() and + * intel_hdmi_hotplug(). + */ + if (state == INTEL_HOTPLUG_UNCHANGED && irq_received) + state = INTEL_HOTPLUG_RETRY; + + return state; } static void intel_dp_check_service_irq(struct intel_dp *intel_dp) @@ -5225,212 +5524,25 @@ static bool bxt_digital_port_connected(struct intel_encoder *encoder) return I915_READ(GEN8_DE_PORT_ISR) & bit; } -static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *intel_dig_port) -{ - enum port port = intel_dig_port->base.port; - - return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); -} - -static const char *tc_type_name(enum tc_port_type type) -{ - static const char * const names[] = { - [TC_PORT_UNKNOWN] = "unknown", - [TC_PORT_LEGACY] = "legacy", - [TC_PORT_TYPEC] = "typec", - [TC_PORT_TBT] = "tbt", - }; - - if (WARN_ON(type >= ARRAY_SIZE(names))) - type = TC_PORT_UNKNOWN; - - return names[type]; -} - -static void icl_update_tc_port_type(struct drm_i915_private *dev_priv, - struct intel_digital_port *intel_dig_port, - bool is_legacy, bool is_typec, bool is_tbt) +static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv, + enum phy phy) { - enum port port = intel_dig_port->base.port; - enum tc_port_type old_type = intel_dig_port->tc_type; - - WARN_ON(is_legacy + is_typec + is_tbt != 1); - - if (is_legacy) - intel_dig_port->tc_type = TC_PORT_LEGACY; - else if (is_typec) - intel_dig_port->tc_type = TC_PORT_TYPEC; - else if (is_tbt) - intel_dig_port->tc_type = TC_PORT_TBT; - else - return; + if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) + return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); - /* Types are not supposed to be changed at runtime. */ - WARN_ON(old_type != TC_PORT_UNKNOWN && - old_type != intel_dig_port->tc_type); - - if (old_type != intel_dig_port->tc_type) - DRM_DEBUG_KMS("Port %c has TC type %s\n", port_name(port), - tc_type_name(intel_dig_port->tc_type)); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); } -/* - * This function implements the first part of the Connect Flow described by our - * specification, Gen11 TypeC Programming chapter. The rest of the flow (reading - * lanes, EDID, etc) is done as needed in the typical places. - * - * Unlike the other ports, type-C ports are not available to use as soon as we - * get a hotplug. The type-C PHYs can be shared between multiple controllers: - * display, USB, etc. As a result, handshaking through FIA is required around - * connect and disconnect to cleanly transfer ownership with the controller and - * set the type-C power state. - * - * We could opt to only do the connect flow when we actually try to use the AUX - * channels or do a modeset, then immediately run the disconnect flow after - * usage, but there are some implications on this for a dynamic environment: - * things may go away or change behind our backs. So for now our driver is - * always trying to acquire ownership of the controller as soon as it gets an - * interrupt (or polls state and sees a port is connected) and only gives it - * back when it sees a disconnect. Implementation of a more fine-grained model - * will require a lot of coordination with user space and thorough testing for - * the extra possible cases. - */ -static bool icl_tc_phy_connect(struct drm_i915_private *dev_priv, - struct intel_digital_port *dig_port) -{ - enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); - u32 val; - - if (dig_port->tc_type != TC_PORT_LEGACY && - dig_port->tc_type != TC_PORT_TYPEC) - return true; - - val = I915_READ(PORT_TX_DFLEXDPPMS); - if (!(val & DP_PHY_MODE_STATUS_COMPLETED(tc_port))) { - DRM_DEBUG_KMS("DP PHY for TC port %d not ready\n", tc_port); - WARN_ON(dig_port->tc_legacy_port); - return false; - } - - /* - * This function may be called many times in a row without an HPD event - * in between, so try to avoid the write when we can. - */ - val = I915_READ(PORT_TX_DFLEXDPCSSS); - if (!(val & DP_PHY_MODE_STATUS_NOT_SAFE(tc_port))) { - val |= DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); - I915_WRITE(PORT_TX_DFLEXDPCSSS, val); - } - - /* - * Now we have to re-check the live state, in case the port recently - * became disconnected. Not necessary for legacy mode. - */ - if (dig_port->tc_type == TC_PORT_TYPEC && - !(I915_READ(PORT_TX_DFLEXDPSP) & TC_LIVE_STATE_TC(tc_port))) { - DRM_DEBUG_KMS("TC PHY %d sudden disconnect.\n", tc_port); - icl_tc_phy_disconnect(dev_priv, dig_port); - return false; - } - - return true; -} - -/* - * See the comment at the connect function. This implements the Disconnect - * Flow. - */ -void icl_tc_phy_disconnect(struct drm_i915_private *dev_priv, - struct intel_digital_port *dig_port) -{ - enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); - - if (dig_port->tc_type == TC_PORT_UNKNOWN) - return; - - /* - * TBT disconnection flow is read the live status, what was done in - * caller. - */ - if (dig_port->tc_type == TC_PORT_TYPEC || - dig_port->tc_type == TC_PORT_LEGACY) { - u32 val; - - val = I915_READ(PORT_TX_DFLEXDPCSSS); - val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); - I915_WRITE(PORT_TX_DFLEXDPCSSS, val); - } - - DRM_DEBUG_KMS("Port %c TC type %s disconnected\n", - port_name(dig_port->base.port), - tc_type_name(dig_port->tc_type)); - - dig_port->tc_type = TC_PORT_UNKNOWN; -} - -/* - * The type-C ports are different because even when they are connected, they may - * not be available/usable by the graphics driver: see the comment on - * icl_tc_phy_connect(). So in our driver instead of adding the additional - * concept of "usable" and make everything check for "connected and usable" we - * define a port as "connected" when it is not only connected, but also when it - * is usable by the rest of the driver. That maintains the old assumption that - * connected ports are usable, and avoids exposing to the users objects they - * can't really use. - */ -static bool icl_tc_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *intel_dig_port) -{ - enum port port = intel_dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - bool is_legacy, is_typec, is_tbt; - u32 dpsp; - - /* - * Complain if we got a legacy port HPD, but VBT didn't mark the port as - * legacy. Treat the port as legacy from now on. - */ - if (!intel_dig_port->tc_legacy_port && - I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) { - DRM_ERROR("VBT incorrectly claims port %c is not TypeC legacy\n", - port_name(port)); - intel_dig_port->tc_legacy_port = true; - } - is_legacy = intel_dig_port->tc_legacy_port; - - /* - * The spec says we shouldn't be using the ISR bits for detecting - * between TC and TBT. We should use DFLEXDPSP. - */ - dpsp = I915_READ(PORT_TX_DFLEXDPSP); - is_typec = dpsp & TC_LIVE_STATE_TC(tc_port); - is_tbt = dpsp & TC_LIVE_STATE_TBT(tc_port); - - if (!is_legacy && !is_typec && !is_tbt) { - icl_tc_phy_disconnect(dev_priv, intel_dig_port); - - return false; - } - - icl_update_tc_port_type(dev_priv, intel_dig_port, is_legacy, is_typec, - is_tbt); - - if (!icl_tc_phy_connect(dev_priv, intel_dig_port)) - return false; - - return true; -} - -static bool icl_digital_port_connected(struct intel_encoder *encoder) +static bool icp_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - if (intel_port_is_combophy(dev_priv, encoder->port)) - return icl_combo_port_connected(dev_priv, dig_port); - else if (intel_port_is_tc(dev_priv, encoder->port)) - return icl_tc_port_connected(dev_priv, dig_port); + if (intel_phy_is_combo(dev_priv, phy)) + return intel_combo_phy_connected(dev_priv, phy); + else if (intel_phy_is_tc(dev_priv, phy)) + return intel_tc_port_connected(dig_port); else MISSING_CASE(encoder->hpd_pin); @@ -5459,9 +5571,9 @@ static bool __intel_digital_port_connected(struct intel_encoder *encoder) return g4x_digital_port_connected(encoder); } - if (INTEL_GEN(dev_priv) >= 11) - return icl_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + return icp_digital_port_connected(encoder); + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) return spt_digital_port_connected(encoder); else if (IS_GEN9_LP(dev_priv)) return bxt_digital_port_connected(encoder); @@ -5539,7 +5651,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum drm_connector_status status; @@ -5588,9 +5700,6 @@ intel_dp_detect(struct drm_connector *connector, if (INTEL_GEN(dev_priv) >= 11) intel_dp_get_dsc_sink_cap(intel_dp); - drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, - drm_dp_is_branch(intel_dp->dpcd)); - intel_dp_configure_mst(intel_dp); if (intel_dp->is_mst) { @@ -5634,13 +5743,19 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } static void intel_dp_force(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *intel_encoder = &dig_port->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); @@ -5675,7 +5790,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) } /* if eDP has no EDID, fall back to fixed mode */ - if (intel_dp_is_edp(intel_attached_dp(connector)) && + if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) && intel_connector->panel.fixed_mode) { struct drm_display_mode *mode; @@ -5693,8 +5808,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) static int intel_dp_connector_register(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_device *dev = connector->dev; + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); int ret; ret = intel_connector_register(connector); @@ -5709,15 +5823,14 @@ intel_dp_connector_register(struct drm_connector *connector) intel_dp->aux.dev = connector->kdev; ret = drm_dp_aux_register(&intel_dp->aux); if (!ret) - drm_dp_cec_register_connector(&intel_dp->aux, - connector->name, dev->dev); + drm_dp_cec_register_connector(&intel_dp->aux, connector); return ret; } static void intel_dp_connector_unregister(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); drm_dp_cec_unregister_connector(&intel_dp->aux); drm_dp_aux_unregister(&intel_dp->aux); @@ -5726,7 +5839,7 @@ intel_dp_connector_unregister(struct drm_connector *connector) void intel_dp_encoder_flush_work(struct drm_encoder *encoder) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(to_intel_encoder(encoder)); struct intel_dp *intel_dp = &intel_dig_port->dp; intel_dp_mst_encoder_cleanup(intel_dig_port); @@ -5755,12 +5868,12 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder) intel_dp_encoder_flush_work(encoder); drm_encoder_cleanup(encoder); - kfree(enc_to_dig_port(encoder)); + kfree(enc_to_dig_port(to_intel_encoder(encoder))); } void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) @@ -5791,7 +5904,7 @@ static int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, u8 *an) { - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&intel_dig_port->base.base)); static const struct drm_dp_aux_msg msg = { .request = DP_AUX_NATIVE_WRITE, .address = DP_AUX_HDCP_AKSV, @@ -6016,47 +6129,49 @@ struct hdcp2_dp_errata_stream_type { u8 stream_type; } __packed; -static struct hdcp2_dp_msg_data { +struct hdcp2_dp_msg_data { u8 msg_id; u32 offset; bool msg_detectable; u32 timeout; u32 timeout2; /* Added for non_paired situation */ - } hdcp2_msg_data[] = { - {HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0}, - {HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, - false, HDCP_2_2_CERT_TIMEOUT_MS, 0}, - {HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, - false, 0, 0}, - {HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, - false, 0, 0}, - {HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, - true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS}, - {HDCP_2_2_AKE_SEND_PAIRING_INFO, - DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, - HDCP_2_2_PAIRING_TIMEOUT_MS, 0}, - {HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0}, - {HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, - false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0}, - {HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, - 0, 0}, - {HDCP_2_2_REP_SEND_RECVID_LIST, - DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, - HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0}, - {HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, - 0, 0}, - {HDCP_2_2_REP_STREAM_MANAGE, - DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, - 0, 0}, - {HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, - false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0}, +}; + +static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { + { HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, + false, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, + { HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, + true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, + HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, + DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, + HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, + { HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, + false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0 }, + { HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_SEND_RECVID_LIST, + DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, + HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, + { HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_MANAGE, + DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, + false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, /* local define to shovel this through the write_2_2 interface */ #define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 - {HDCP_2_2_ERRATA_DP_STREAM_TYPE, - DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, - 0, 0}, - }; + { HDCP_2_2_ERRATA_DP_STREAM_TYPE, + DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, + 0, 0 }, +}; static inline int intel_dp_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port, @@ -6110,7 +6225,7 @@ int hdcp2_detect_msg_availability(struct intel_digital_port *intel_dig_port, static ssize_t intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port, - struct hdcp2_dp_msg_data *hdcp2_msg_data) + const struct hdcp2_dp_msg_data *hdcp2_msg_data) { struct intel_dp *dp = &intel_dig_port->dp; struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; @@ -6149,13 +6264,13 @@ intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port, return ret; } -static struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) +static const struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) { int i; - for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++) - if (hdcp2_msg_data[i].msg_id == msg_id) - return &hdcp2_msg_data[i]; + for (i = 0; i < ARRAY_SIZE(hdcp2_dp_msg_data); i++) + if (hdcp2_dp_msg_data[i].msg_id == msg_id) + return &hdcp2_dp_msg_data[i]; return NULL; } @@ -6169,7 +6284,7 @@ int intel_dp_hdcp2_write_msg(struct intel_digital_port *intel_dig_port, unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_write, len; - struct hdcp2_dp_msg_data *hdcp2_msg_data; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); if (!hdcp2_msg_data) @@ -6233,7 +6348,7 @@ int intel_dp_hdcp2_read_msg(struct intel_digital_port *intel_dig_port, unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_recv, len; - struct hdcp2_dp_msg_data *hdcp2_msg_data; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); if (!hdcp2_msg_data) @@ -6399,7 +6514,7 @@ static enum pipe vlv_active_pipe(struct intel_dp *intel_dp) void intel_dp_encoder_reset(struct drm_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(encoder)); struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); intel_wakeref_t wakeref; @@ -6466,13 +6581,15 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) * would end up in an endless cycle of * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..." */ - DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("ignoring long hpd on eDP [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return IRQ_HANDLED; } - DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", - port_name(intel_dig_port->base.port), + DRM_DEBUG_KMS("got hpd irq on [ENCODER:%d:%s] - %s\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, long_hpd ? "long" : "short"); if (long_hpd) { @@ -6539,6 +6656,13 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect else if (INTEL_GEN(dev_priv) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); + intel_attach_colorspace_property(connector); + + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 11) + drm_object_attach_property(&connector->base, + connector->dev->mode_config.hdr_output_metadata_property, + 0); + if (intel_dp_is_edp(intel_dp)) { u32 allowed_scalers; @@ -6569,7 +6693,7 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) intel_pps_get_registers(intel_dp, ®s); - pp_ctl = ironlake_get_pp_control(intel_dp); + pp_ctl = ilk_get_pp_control(intel_dp); /* Ensure PPS is unlocked */ if (!HAS_DDI(dev_priv)) @@ -6739,7 +6863,7 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, * soon as the new power sequencer gets initialized. */ if (force_disable_vdd) { - u32 pp = ironlake_get_pp_control(intel_dp); + u32 pp = ilk_get_pp_control(intel_dp); WARN(pp & PANEL_POWER_ON, "Panel power already on\n"); @@ -6835,10 +6959,8 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state, int refresh_rate) { - struct intel_encoder *encoder; - struct intel_digital_port *dig_port = NULL; struct intel_dp *intel_dp = dev_priv->drrs.dp; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); enum drrs_refresh_rate_type index = DRRS_HIGH_RR; if (refresh_rate <= 0) { @@ -6851,9 +6973,6 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, return; } - dig_port = dp_to_dig_port(intel_dp); - encoder = &dig_port->base; - if (!intel_crtc) { DRM_DEBUG_KMS("DRRS: intel_crtc not initialized\n"); return; @@ -6874,7 +6993,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, return; } - if (!crtc_state->base.active) { + if (!crtc_state->hw.active) { DRM_DEBUG_KMS("eDP encoder disabled. CRTC not Active\n"); return; } @@ -7333,6 +7452,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_encoder->port; + enum phy phy = intel_port_to_phy(dev_priv, port); int type; /* Initialize the work for modeset in case of link train failure */ @@ -7340,8 +7460,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_modeset_retry_work_fn); if (WARN(intel_dig_port->max_lanes < 1, - "Not enough lanes (%d) for DP on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for DP on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return false; intel_dp_set_source_rates(intel_dp); @@ -7359,7 +7480,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, * Currently we don't support eDP on TypeC ports, although in * theory it could work on TypeC legacy ports. */ - WARN_ON(intel_port_is_tc(dev_priv, port)); + WARN_ON(intel_phy_is_tc(dev_priv, phy)); type = DRM_MODE_CONNECTOR_eDP; } else { type = DRM_MODE_CONNECTOR_DisplayPort; @@ -7382,9 +7503,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, port != PORT_B && port != PORT_C)) return false; - DRM_DEBUG_KMS("Adding %s connector on port %c\n", - type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", - port_name(port)); + DRM_DEBUG_KMS("Adding %s connector on [ENCODER:%d:%s]\n", + type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", + intel_encoder->base.base.id, intel_encoder->base.name); drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); @@ -7408,11 +7529,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_connector->get_hw_state = intel_connector_get_hw_state; /* init MST on ports that can support it */ - if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) && - (port == PORT_B || port == PORT_C || - port == PORT_D || port == PORT_F)) - intel_dp_mst_encoder_init(intel_dig_port, - intel_connector->base.base.id); + intel_dp_mst_encoder_init(intel_dig_port, + intel_connector->base.base.id); if (!intel_edp_init_connector(intel_dp, intel_connector)) { intel_dp_aux_fini(intel_dp); @@ -7503,11 +7621,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->pipe_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 0; intel_encoder->port = port; @@ -7542,7 +7660,7 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_DDI) continue; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (!intel_dp->can_mst) continue; @@ -7563,12 +7681,13 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_DDI) continue; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (!intel_dp->can_mst) continue; - ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr); + ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr, + true); if (ret) { intel_dp->is_mst = false; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index da70b1a41c83..3da166054788 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -13,6 +13,7 @@ #include "i915_reg.h" enum pipe; +enum port; struct drm_connector_state; struct drm_encoder; struct drm_i915_private; @@ -102,22 +103,26 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay); -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_digital_port_connected(struct intel_encoder *encoder); -void icl_tc_phy_disconnect(struct drm_i915_private *dev_priv, - struct intel_digital_port *dig_port); static inline unsigned int intel_dp_unused_lane_mask(int lane_count) { return ~((1 << lane_count) - 1) & 0xf; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 7ded95a334db..7c653f8c307f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -22,8 +22,8 @@ * */ +#include "intel_display_types.h" #include "intel_dp_aux_backlight.h" -#include "intel_drv.h" static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) { @@ -57,7 +57,7 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) */ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 read_val[2] = { 0x0 }; u16 level = 0; @@ -82,7 +82,7 @@ static void intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 vals[2] = { 0x0 }; vals[0] = level; @@ -110,7 +110,7 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1; u8 pn, pn_min, pn_max; @@ -178,7 +178,7 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st const struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode; if (drm_dp_dpcd_readb(&intel_dp->aux, @@ -222,13 +222,14 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st static void intel_dp_aux_disable_backlight(const struct drm_connector_state *old_conn_state) { - set_aux_backlight_enable(enc_to_intel_dp(old_conn_state->best_encoder), false); + set_aux_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)), + false); } static int intel_dp_aux_setup_backlight(struct intel_connector *connector, enum pipe pipe) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct intel_panel *panel = &connector->panel; if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) @@ -247,7 +248,7 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, static bool intel_dp_aux_display_control_capable(struct intel_connector *connector) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); /* Check the eDP Display control capabilities registers to determine if * the panel can support backlight control over the aux channel @@ -264,8 +265,11 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector) int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector) { struct intel_panel *panel = &intel_connector->panel; + struct drm_i915_private *dev_priv = to_i915(intel_connector->base.dev); - if (!i915_modparams.enable_dpcd_backlight) + if (i915_modparams.enable_dpcd_backlight == 0 || + (i915_modparams.enable_dpcd_backlight == -1 && + dev_priv->vbt.backlight.type != INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE)) return -ENODEV; if (!intel_dp_aux_display_control_capable(intel_connector)) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 9b1fccea966b..2a1130dd1ad0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -21,9 +21,9 @@ * IN THE SOFTWARE. */ +#include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" -#include "intel_drv.h" static void intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 60652ebbdf61..cba68c5a80fa 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -32,23 +32,23 @@ #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_mst.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state, struct link_config_limits *limits) { - struct drm_atomic_state *state = crtc_state->base.state; - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct drm_atomic_state *state = crtc_state->uapi.state; + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; struct intel_connector *connector = to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; void *port = connector->port; bool constant_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_CONSTANT_N); @@ -61,10 +61,11 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, crtc_state->pipe_bpp = bpp; crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, - crtc_state->pipe_bpp); + crtc_state->pipe_bpp, + false); slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, - port, crtc_state->pbn); + port, crtc_state->pbn, 0); if (slots == -EDEADLK) return slots; if (slots >= 0) @@ -81,25 +82,71 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, crtc_state->port_clock, &crtc_state->dp_m_n, - constant_n); + constant_n, crtc_state->fec_enable); crtc_state->dp_m_n.tu = slots; return 0; } +/* + * Iterate over all connectors and return the smallest transcoder in the MST + * stream + */ +static enum transcoder +intel_dp_mst_master_trans_compute(struct intel_atomic_state *state, + struct intel_dp *mst_port) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_digital_connector_state *conn_state; + struct intel_connector *connector; + enum pipe ret = I915_MAX_PIPES; + int i; + + if (INTEL_GEN(dev_priv) < 12) + return INVALID_TRANSCODER; + + for_each_new_intel_connector_in_state(state, connector, conn_state, i) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + + if (connector->mst_port != mst_port || !conn_state->base.crtc) + continue; + + crtc = to_intel_crtc(conn_state->base.crtc); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (!crtc_state->uapi.active) + continue; + + /* + * Using crtc->pipe because crtc_state->cpu_transcoder is + * computed, so others CRTCs could have non-computed + * cpu_transcoder + */ + if (crtc->pipe < ret) + ret = crtc->pipe; + } + + if (ret == I915_MAX_PIPES) + return INVALID_TRANSCODER; + + /* Simple cast works because TGL don't have a eDP transcoder */ + return (enum transcoder)ret; +} + static int intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); const struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; void *port = connector->port; struct link_config_limits limits; int ret; @@ -128,7 +175,15 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, limits.max_lane_count = intel_dp_max_lane_count(intel_dp); limits.min_bpp = intel_dp_min_bpp(pipe_config); - limits.max_bpp = pipe_config->pipe_bpp; + /* + * FIXME: If all the streams can't fit into the link with + * their current pipe_bpp we should reduce pipe_bpp across + * the board until things start to fit. Until then we + * limit to <= 8bpc since that's what was hardcoded for all + * MST streams previously. This hack should be removed once + * we have the proper retry logic in place. + */ + limits.max_bpp = min(pipe_config->pipe_bpp, 24); intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); @@ -146,25 +201,91 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + pipe_config->mst_master_transcoder = intel_dp_mst_master_trans_compute(state, intel_dp); + + return 0; +} + +/* + * If one of the connectors in a MST stream needs a modeset, mark all CRTCs + * that shares the same MST stream as mode changed, + * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do + * a fastset when possible. + */ +static int +intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector, + struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector_list_iter connector_list_iter; + struct intel_connector *connector_iter; + + if (INTEL_GEN(dev_priv) < 12) + return 0; + + if (!intel_connector_needs_modeset(state, &connector->base)) + return 0; + + drm_connector_list_iter_begin(&dev_priv->drm, &connector_list_iter); + for_each_intel_connector_iter(connector_iter, &connector_list_iter) { + struct intel_digital_connector_state *conn_iter_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int ret; + + if (connector_iter->mst_port != connector->mst_port || + connector_iter == connector) + continue; + + conn_iter_state = intel_atomic_get_digital_connector_state(state, + connector_iter); + if (IS_ERR(conn_iter_state)) { + drm_connector_list_iter_end(&connector_list_iter); + return PTR_ERR(conn_iter_state); + } + + if (!conn_iter_state->base.crtc) + continue; + + crtc = to_intel_crtc(conn_iter_state->base.crtc); + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) { + drm_connector_list_iter_end(&connector_list_iter); + return PTR_ERR(crtc_state); + } + + ret = drm_atomic_add_affected_planes(&state->base, &crtc->base); + if (ret) { + drm_connector_list_iter_end(&connector_list_iter); + return ret; + } + crtc_state->uapi.mode_changed = true; + } + drm_connector_list_iter_end(&connector_list_iter); + return 0; } static int intel_dp_mst_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *state) + struct drm_atomic_state *_state) { + struct intel_atomic_state *state = to_intel_atomic_state(_state); struct drm_connector_state *new_conn_state = - drm_atomic_get_new_connector_state(state, connector); + drm_atomic_get_new_connector_state(&state->base, connector); struct drm_connector_state *old_conn_state = - drm_atomic_get_old_connector_state(state, connector); + drm_atomic_get_old_connector_state(&state->base, connector); struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_crtc *new_crtc = new_conn_state->crtc; - struct drm_crtc_state *crtc_state; struct drm_dp_mst_topology_mgr *mgr; int ret; - ret = intel_digital_connector_atomic_check(connector, state); + ret = intel_digital_connector_atomic_check(connector, &state->base); + if (ret) + return ret; + + ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state); if (ret) return ret; @@ -175,16 +296,18 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, * connector */ if (new_crtc) { - crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, intel_crtc); if (!crtc_state || - !drm_atomic_crtc_needs_modeset(crtc_state) || - crtc_state->enable) + !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) || + crtc_state->uapi.enable) return 0; } - mgr = &enc_to_mst(old_conn_state->best_encoder)->primary->dp.mst_mgr; - ret = drm_dp_atomic_release_vcpi_slots(state, mgr, + mgr = &enc_to_mst(to_intel_encoder(old_conn_state->best_encoder))->primary->dp.mst_mgr; + ret = drm_dp_atomic_release_vcpi_slots(&state->base, mgr, intel_connector->port); return ret; @@ -194,7 +317,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = @@ -207,7 +330,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); if (ret) { - DRM_ERROR("failed to update payload %d\n", ret); + DRM_DEBUG_KMS("failed to update payload %d\n", ret); } if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, @@ -218,36 +341,65 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + bool last_mst_stream; + u32 val; - intel_ddi_disable_pipe_clock(old_crtc_state); + intel_dp->active_mst_links--; + last_mst_stream = intel_dp->active_mst_links == 0; + WARN_ON(INTEL_GEN(dev_priv) >= 12 && last_mst_stream && + !intel_dp_mst_is_master_trans(old_crtc_state)); + + intel_crtc_vblank_off(old_crtc_state); + + intel_disable_pipe(old_crtc_state); - /* this can fail */ - drm_dp_check_act_status(&intel_dp->mst_mgr); - /* and this can also fail */ drm_dp_update_payload_part2(&intel_dp->mst_mgr); + val = I915_READ(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder)); + val &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC; + I915_WRITE(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), val); + + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_ACT_SENT, 1)) + DRM_ERROR("Timed out waiting for ACT sent when disabling\n"); + drm_dp_check_act_status(&intel_dp->mst_mgr); + drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + intel_ddi_disable_transcoder_func(old_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_scaler_disable(old_crtc_state); + else + ilk_pfit_disable(old_crtc_state); + /* * Power down mst path before disabling the port, otherwise we end * up getting interrupts from the sink upon detecting link loss. */ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, false); + /* + * From TGL spec: "If multi-stream slave transcoder: Configure + * Transcoder Clock Select to direct no clock to the transcoder" + * + * From older GENs spec: "Configure Transcoder Clock Select to direct + * no clock to the transcoder" + */ + if (INTEL_GEN(dev_priv) < 12 || !last_mst_stream) + intel_ddi_disable_pipe_clock(old_crtc_state); - intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) { - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + if (last_mst_stream) intel_dig_port->base.post_disable(&intel_dig_port->base, old_crtc_state, NULL); - } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -256,7 +408,7 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; @@ -265,48 +417,37 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder, pipe_config, NULL); } -static void intel_mst_post_pll_disable_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) -{ - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); - struct intel_digital_port *intel_dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &intel_dig_port->dp; - - if (intel_dp->active_mst_links == 0) - intel_dig_port->base.post_pll_disable(&intel_dig_port->base, - old_crtc_state, - old_conn_state); -} - static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; u32 temp; + bool first_mst_stream; /* MST encoders are bound to a crtc, not to a connector, * force the mapping here for get_hw_state. */ connector->encoder = encoder; intel_mst->connector = connector; + first_mst_stream = intel_dp->active_mst_links == 0; + WARN_ON(INTEL_GEN(dev_priv) >= 12 && first_mst_stream && + !intel_dp_mst_is_master_trans(pipe_config)); DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); - if (intel_dp->active_mst_links == 0) + if (first_mst_stream) intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); - if (intel_dp->active_mst_links == 0) + if (first_mst_stream) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); @@ -318,31 +459,37 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_ERROR("failed to allocate vcpi\n"); intel_dp->active_mst_links++; - temp = I915_READ(DP_TP_STATUS(port)); - I915_WRITE(DP_TP_STATUS(port), temp); + temp = I915_READ(intel_dp->regs.dp_tp_status); + I915_WRITE(intel_dp->regs.dp_tp_status, temp); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); - intel_ddi_enable_pipe_clock(pipe_config); + /* + * Before Gen 12 this is not done as part of + * intel_dig_port->base.pre_enable() and should be done here. For + * Gen 12+ the step in which this should be done is different for the + * first MST stream, so it's done on the DDI for the first stream and + * here for the following ones. + */ + if (INTEL_GEN(dev_priv) < 12 || !first_mst_stream) + intel_ddi_enable_pipe_clock(pipe_config); + + intel_ddi_set_dp_msa(pipe_config, conn_state); } static void intel_mst_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); - if (intel_wait_for_register(&dev_priv->uncore, - DP_TP_STATUS(port), - DP_TP_STATUS_ACT_SENT, - DP_TP_STATUS_ACT_SENT, - 1)) + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_ACT_SENT, 1)) DRM_ERROR("Timed out waiting for ACT sent\n"); drm_dp_check_act_status(&intel_dp->mst_mgr); @@ -355,7 +502,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); *pipe = intel_mst->pipe; if (intel_mst->connector) return true; @@ -365,7 +512,7 @@ static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; intel_ddi_get_config(&intel_dig_port->base, pipe_config); @@ -388,20 +535,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) return ret; } -static enum drm_connector_status -intel_dp_mst_detect(struct drm_connector *connector, bool force) -{ - struct intel_connector *intel_connector = to_intel_connector(connector); - struct intel_dp *intel_dp = intel_connector->mst_port; - - if (drm_connector_is_unregistered(connector)) - return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, - intel_connector->port); -} - static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { - .detect = intel_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, @@ -421,6 +555,7 @@ static enum drm_mode_status intel_dp_mst_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -448,7 +583,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, if (mode_rate > max_rate || mode->clock > max_dotclk) return MODE_CLOCK_HIGH; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector, @@ -461,16 +596,31 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c return &intel_dp->mst_encoders[crtc->pipe]->base.base; } +static int +intel_dp_mst_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, bool force) +{ + struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_dp *intel_dp = intel_connector->mst_port; + + if (drm_connector_is_unregistered(connector)) + return connector_status_disconnected; + + return drm_dp_mst_detect_port(connector, ctx, &intel_dp->mst_mgr, + intel_connector->port); +} + static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_funcs = { .get_modes = intel_dp_mst_get_modes, .mode_valid = intel_dp_mst_mode_valid, .atomic_best_encoder = intel_mst_atomic_best_encoder, .atomic_check = intel_dp_mst_atomic_check, + .detect_ctx = intel_dp_mst_detect, }; static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(to_intel_encoder(encoder)); drm_encoder_cleanup(encoder); kfree(intel_mst); @@ -539,7 +689,15 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - drm_connector_attach_max_bpc_property(connector, 6, 12); + + /* + * Reuse the prop from the SST connector because we're + * not allowed to create new props after device registration. + */ + connector->max_bpc_property = + intel_dp->attached_connector->base.max_bpc_property; + if (connector->max_bpc_property) + drm_connector_attach_max_bpc_property(connector, 6, 12); return connector; @@ -602,14 +760,21 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; + /* + * This is wrong, but broken userspace uses the intersection + * of possible_crtcs of all the encoders of a given connector + * to figure out which crtcs can drive said connector. What + * should be used instead is the union of possible_crtcs. + * To keep such userspace functioning we must misconfigure + * this to make sure the intersection is not empty :( + */ + intel_encoder->pipe_mask = ~0; intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; - intel_encoder->post_pll_disable = intel_mst_post_pll_disable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state; @@ -632,23 +797,39 @@ intel_dp_create_fake_mst_encoders(struct intel_digital_port *intel_dig_port) } int +intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port) +{ + return intel_dig_port->dp.active_mst_links; +} + +int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_base_id) { + struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; + enum port port = intel_dig_port->base.port; int ret; - intel_dp->can_mst = true; + if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp)) + return 0; + + if (INTEL_GEN(i915) < 12 && port == PORT_A) + return 0; + + if (INTEL_GEN(i915) < 11 && port == PORT_E) + return 0; + intel_dp->mst_mgr.cbs = &mst_cbs; /* create encoders */ intel_dp_create_fake_mst_encoders(intel_dig_port); - ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, dev, + ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm, &intel_dp->aux, 16, 3, conn_base_id); - if (ret) { - intel_dp->can_mst = false; + if (ret) return ret; - } + + intel_dp->can_mst = true; + return 0; } @@ -663,3 +844,14 @@ intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port) drm_dp_mst_topology_mgr_destroy(&intel_dp->mst_mgr); /* encoders will get killed by normal cleanup */ } + +bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state) +{ + return crtc_state->mst_master_transcoder == crtc_state->cpu_transcoder; +} + +bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state) +{ + return crtc_state->mst_master_transcoder != INVALID_TRANSCODER && + crtc_state->mst_master_transcoder != crtc_state->cpu_transcoder; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 1470c6e0514b..854724f68f09 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -6,9 +6,15 @@ #ifndef __INTEL_DP_MST_H__ #define __INTEL_DP_MST_H__ +#include <linux/types.h> + struct intel_digital_port; +struct intel_crtc_state; int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); +int intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port); +bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state); +bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 7ccf7f3974db..6fb1f7a7364e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -23,8 +23,8 @@ #include "display/intel_dp.h" +#include "intel_display_types.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" #include "intel_sideband.h" /** @@ -345,10 +345,8 @@ static u32 bxt_get_grc(struct drm_i915_private *dev_priv, enum dpio_phy phy) static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv, enum dpio_phy phy) { - if (intel_wait_for_register(&dev_priv->uncore, - BXT_PORT_REF_DW3(phy), - GRC_DONE, GRC_DONE, - 10)) + if (intel_de_wait_for_set(dev_priv, BXT_PORT_REF_DW3(phy), + GRC_DONE, 10)) DRM_ERROR("timeout waiting for PHY%d GRC\n", phy); } @@ -644,7 +642,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, bool uniq_trans_scale) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; @@ -740,8 +738,8 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder)); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; u32 val; @@ -783,9 +781,9 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, void chv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; unsigned int lane_mask = @@ -863,10 +861,10 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; int data, i, stagger; @@ -942,7 +940,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, void chv_phy_release_cl2_override(struct intel_encoder *encoder) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (dport->release_cl2_override) { @@ -955,7 +953,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(old_crtc_state->uapi.crtc)->pipe; u32 val; vlv_dpio_get(dev_priv); @@ -991,7 +989,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; @@ -1016,9 +1014,9 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; @@ -1045,10 +1043,10 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; u32 val; @@ -1075,9 +1073,9 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, void vlv_phy_reset_lanes(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 2d4e7b9a7b9d..c75e34d87111 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -21,9 +21,9 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_display_types.h" #include "intel_dpio_phy.h" #include "intel_dpll_mgr.h" -#include "intel_drv.h" /** * DOC: Display PLLs @@ -36,9 +36,10 @@ * This file provides an abstraction over display PLLs. The function * intel_shared_dpll_init() initializes the PLLs for the given platform. The * users of a PLL are tracked and that tracking is integrated with the atomic - * modest interface. During an atomic operation, a PLL can be requested for a - * given CRTC and encoder configuration by calling intel_get_shared_dpll() and - * a previously used PLL can be released with intel_release_shared_dpll(). + * modset interface. During an atomic operation, required PLLs can be reserved + * for a given CRTC and encoder configuration by calling + * intel_reserve_shared_dplls() and previously reserved PLLs can be released + * with intel_release_shared_dplls(). * Changes to the users are first staged in the atomic state, and then made * effective by calling intel_shared_dpll_swap_state() during the atomic * commit phase. @@ -135,7 +136,7 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, */ void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; @@ -162,7 +163,7 @@ void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state) */ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; unsigned int crtc_mask = drm_crtc_mask(&crtc->base); @@ -207,7 +208,7 @@ out: */ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; unsigned int crtc_mask = drm_crtc_mask(&crtc->base); @@ -243,19 +244,21 @@ out: } static struct intel_shared_dpll * -intel_find_shared_dpll(struct intel_crtc_state *crtc_state, - enum intel_dpll_id range_min, - enum intel_dpll_id range_max) +intel_find_shared_dpll(struct intel_atomic_state *state, + const struct intel_crtc *crtc, + const struct intel_dpll_hw_state *pll_state, + unsigned long dpll_mask) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll, *unused_pll = NULL; struct intel_shared_dpll_state *shared_dpll; enum intel_dpll_id i; - shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state); + shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - for (i = range_min; i <= range_max; i++) { + WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1)); + + for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) { pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ @@ -265,9 +268,9 @@ intel_find_shared_dpll(struct intel_crtc_state *crtc_state, continue; } - if (memcmp(&crtc_state->dpll_hw_state, + if (memcmp(pll_state, &shared_dpll[i].hw_state, - sizeof(crtc_state->dpll_hw_state)) == 0) { + sizeof(*pll_state)) == 0) { DRM_DEBUG_KMS("[CRTC:%d:%s] sharing existing %s (crtc mask 0x%08x, active %x)\n", crtc->base.base.id, crtc->base.name, pll->info->name, @@ -289,26 +292,51 @@ intel_find_shared_dpll(struct intel_crtc_state *crtc_state, } static void -intel_reference_shared_dpll(struct intel_shared_dpll *pll, - struct intel_crtc_state *crtc_state) +intel_reference_shared_dpll(struct intel_atomic_state *state, + const struct intel_crtc *crtc, + const struct intel_shared_dpll *pll, + const struct intel_dpll_hw_state *pll_state) { struct intel_shared_dpll_state *shared_dpll; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const enum intel_dpll_id id = pll->info->id; - shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state); + shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); if (shared_dpll[id].crtc_mask == 0) - shared_dpll[id].hw_state = - crtc_state->dpll_hw_state; + shared_dpll[id].hw_state = *pll_state; - crtc_state->shared_dpll = pll; DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->info->name, pipe_name(crtc->pipe)); shared_dpll[id].crtc_mask |= 1 << crtc->pipe; } +static void intel_unreference_shared_dpll(struct intel_atomic_state *state, + const struct intel_crtc *crtc, + const struct intel_shared_dpll *pll) +{ + struct intel_shared_dpll_state *shared_dpll; + + shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); + shared_dpll[pll->info->id].crtc_mask &= ~(1 << crtc->pipe); +} + +static void intel_put_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + new_crtc_state->shared_dpll = NULL; + + if (!old_crtc_state->shared_dpll) + return; + + intel_unreference_shared_dpll(state, crtc, old_crtc_state->shared_dpll); +} + /** * intel_shared_dpll_swap_state - make atomic DPLL configuration effective * @state: atomic state @@ -320,25 +348,20 @@ intel_reference_shared_dpll(struct intel_shared_dpll *pll, * i.e. it also puts the current state into @state, even though there is no * need for that at this moment. */ -void intel_shared_dpll_swap_state(struct drm_atomic_state *state) +void intel_shared_dpll_swap_state(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_shared_dpll_state *shared_dpll; - struct intel_shared_dpll *pll; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_shared_dpll_state *shared_dpll = state->shared_dpll; enum intel_dpll_id i; - if (!to_intel_atomic_state(state)->dpll_set) + if (!state->dpll_set) return; - shared_dpll = to_intel_atomic_state(state)->shared_dpll; for (i = 0; i < dev_priv->num_shared_dpll; i++) { - struct intel_shared_dpll_state tmp; - - pll = &dev_priv->shared_dplls[i]; + struct intel_shared_dpll *pll = + &dev_priv->shared_dplls[i]; - tmp = pll->state; - pll->state = shared_dpll[i]; - shared_dpll[i] = tmp; + swap(pll->state, shared_dpll[i]); } } @@ -421,11 +444,12 @@ static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv, udelay(200); } -static struct intel_shared_dpll * -ibx_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool ibx_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; enum intel_dpll_id i; @@ -439,18 +463,22 @@ ibx_get_dpll(struct intel_crtc_state *crtc_state, crtc->base.base.id, crtc->base.name, pll->info->name); } else { - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_PCH_PLL_A, - DPLL_ID_PCH_PLL_B); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_PCH_PLL_B) | + BIT(DPLL_ID_PCH_PLL_A)); } if (!pll) - return NULL; + return false; /* reference the pll */ - intel_reference_shared_dpll(pll, crtc_state); + intel_reference_shared_dpll(state, crtc, + pll, &crtc_state->dpll_hw_state); - return pll; + crtc_state->shared_dpll = pll; + + return true; } static void ibx_dump_hw_state(struct drm_i915_private *dev_priv, @@ -498,16 +526,31 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); POSTING_READ(WRPLL_CTL(id)); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + enum intel_dpll_id id = pll->info->id; u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); POSTING_READ(SPLL_CTL); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, @@ -767,8 +810,12 @@ hsw_ddi_calculate_wrpll(int clock /* in Hz */, *r2_out = best.r2; } -static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(struct intel_crtc_state *crtc_state) +static struct intel_shared_dpll * +hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_shared_dpll *pll; u32 val; unsigned int p, n2, r2; @@ -781,8 +828,10 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(struct intel_crtc_state * crtc_state->dpll_hw_state.wrpll = val; - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_WRPLL2) | + BIT(DPLL_ID_WRPLL1)); if (!pll) return NULL; @@ -793,7 +842,7 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(struct intel_crtc_state * static struct intel_shared_dpll * hsw_ddi_dp_get_dpll(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct intel_shared_dpll *pll; enum intel_dpll_id pll_id; int clock = crtc_state->port_clock; @@ -821,38 +870,44 @@ hsw_ddi_dp_get_dpll(struct intel_crtc_state *crtc_state) return pll; } -static struct intel_shared_dpll * -hsw_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool hsw_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_shared_dpll *pll; memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { - pll = hsw_ddi_hdmi_get_dpll(crtc_state); + pll = hsw_ddi_hdmi_get_dpll(state, crtc); } else if (intel_crtc_has_dp_encoder(crtc_state)) { pll = hsw_ddi_dp_get_dpll(crtc_state); } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { if (WARN_ON(crtc_state->port_clock / 2 != 135000)) - return NULL; + return false; crtc_state->dpll_hw_state.spll = SPLL_PLL_ENABLE | SPLL_FREQ_1350MHz | SPLL_REF_MUXED_SSC; - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_SPLL, DPLL_ID_SPLL); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_SPLL)); } else { - return NULL; + return false; } if (!pll) - return NULL; + return false; - intel_reference_shared_dpll(pll, crtc_state); + intel_reference_shared_dpll(state, crtc, + pll, &crtc_state->dpll_hw_state); - return pll; + crtc_state->shared_dpll = pll; + + return true; } static void hsw_dump_hw_state(struct drm_i915_private *dev_priv, @@ -962,11 +1017,7 @@ static void skl_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(regs[id].ctl, I915_READ(regs[id].ctl) | LCPLL_PLL_ENABLE); - if (intel_wait_for_register(&dev_priv->uncore, - DPLL_STATUS, - DPLL_LOCK(id), - DPLL_LOCK(id), - 5)) + if (intel_de_wait_for_set(dev_priv, DPLL_STATUS, DPLL_LOCK(id), 5)) DRM_ERROR("DPLL %d not locked\n", id); } @@ -1385,10 +1436,12 @@ skl_ddi_dp_set_dpll_hw_state(struct intel_crtc_state *crtc_state) return true; } -static struct intel_shared_dpll * -skl_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool skl_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_shared_dpll *pll; bool bret; @@ -1396,32 +1449,37 @@ skl_get_dpll(struct intel_crtc_state *crtc_state, bret = skl_ddi_hdmi_pll_dividers(crtc_state); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); - return NULL; + return false; } } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = skl_ddi_dp_set_dpll_hw_state(crtc_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); - return NULL; + return false; } } else { - return NULL; + return false; } if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL0); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_SKL_DPLL0)); else - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_SKL_DPLL1, - DPLL_ID_SKL_DPLL3); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_SKL_DPLL3) | + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1)); if (!pll) - return NULL; + return false; - intel_reference_shared_dpll(pll, crtc_state); + intel_reference_shared_dpll(state, crtc, + pll, &crtc_state->dpll_hw_state); - return pll; + crtc_state->shared_dpll = pll; + + return true; } static void skl_dump_hw_state(struct drm_i915_private *dev_priv, @@ -1693,7 +1751,7 @@ static bool bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state, struct bxt_clk_div *clk_div) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct dpll best_clock; /* Calculate HDMI div */ @@ -1827,22 +1885,23 @@ bxt_ddi_hdmi_set_dpll_hw_state(struct intel_crtc_state *crtc_state) return bxt_ddi_set_dpll_hw_state(crtc_state, &clk_div); } -static struct intel_shared_dpll * -bxt_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool bxt_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; enum intel_dpll_id id; if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && !bxt_ddi_hdmi_set_dpll_hw_state(crtc_state)) - return NULL; + return false; if (intel_crtc_has_dp_encoder(crtc_state) && !bxt_ddi_dp_set_dpll_hw_state(crtc_state)) - return NULL; + return false; /* 1:1 mapping between ports and PLLs */ id = (enum intel_dpll_id) encoder->port; @@ -1851,9 +1910,12 @@ bxt_get_dpll(struct intel_crtc_state *crtc_state, DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", crtc->base.base.id, crtc->base.name, pll->info->name); - intel_reference_shared_dpll(pll, crtc_state); + intel_reference_shared_dpll(state, crtc, + pll, &crtc_state->dpll_hw_state); - return pll; + crtc_state->shared_dpll = pll; + + return true; } static void bxt_dump_hw_state(struct drm_i915_private *dev_priv, @@ -1884,9 +1946,14 @@ static const struct intel_shared_dpll_funcs bxt_ddi_pll_funcs = { struct intel_dpll_mgr { const struct dpll_info *dpll_info; - struct intel_shared_dpll *(*get_dpll)(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder); - + bool (*get_dplls)(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder); + void (*put_dplls)(struct intel_atomic_state *state, + struct intel_crtc *crtc); + void (*update_active_dpll)(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder); void (*dump_hw_state)(struct drm_i915_private *dev_priv, const struct intel_dpll_hw_state *hw_state); }; @@ -1899,7 +1966,8 @@ static const struct dpll_info pch_plls[] = { static const struct intel_dpll_mgr pch_pll_mgr = { .dpll_info = pch_plls, - .get_dpll = ibx_get_dpll, + .get_dplls = ibx_get_dpll, + .put_dplls = intel_put_dpll, .dump_hw_state = ibx_dump_hw_state, }; @@ -1915,7 +1983,8 @@ static const struct dpll_info hsw_plls[] = { static const struct intel_dpll_mgr hsw_pll_mgr = { .dpll_info = hsw_plls, - .get_dpll = hsw_get_dpll, + .get_dplls = hsw_get_dpll, + .put_dplls = intel_put_dpll, .dump_hw_state = hsw_dump_hw_state, }; @@ -1929,7 +1998,8 @@ static const struct dpll_info skl_plls[] = { static const struct intel_dpll_mgr skl_pll_mgr = { .dpll_info = skl_plls, - .get_dpll = skl_get_dpll, + .get_dplls = skl_get_dpll, + .put_dplls = intel_put_dpll, .dump_hw_state = skl_dump_hw_state, }; @@ -1942,7 +2012,8 @@ static const struct dpll_info bxt_plls[] = { static const struct intel_dpll_mgr bxt_pll_mgr = { .dpll_info = bxt_plls, - .get_dpll = bxt_get_dpll, + .get_dplls = bxt_get_dpll, + .put_dplls = intel_put_dpll, .dump_hw_state = bxt_dump_hw_state, }; @@ -1958,11 +2029,8 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ - if (intel_wait_for_register(&dev_priv->uncore, - CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, - PLL_POWER_STATE, - 5)) + if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), + PLL_POWER_STATE, 5)) DRM_ERROR("PLL %d Power not enabled\n", id); /* @@ -1999,11 +2067,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for PLL lock status in DPLL_ENABLE. */ - if (intel_wait_for_register(&dev_priv->uncore, - CNL_DPLL_ENABLE(id), - PLL_LOCK, - PLL_LOCK, - 5)) + if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) DRM_ERROR("PLL %d not locked\n", id); /* @@ -2047,11 +2111,7 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ - if (intel_wait_for_register(&dev_priv->uncore, - CNL_DPLL_ENABLE(id), - PLL_LOCK, - 0, - 5)) + if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) DRM_ERROR("PLL %d locked\n", id); /* @@ -2069,11 +2129,8 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ - if (intel_wait_for_register(&dev_priv->uncore, - CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, - 0, - 5)) + if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), + PLL_POWER_STATE, 5)) DRM_ERROR("PLL %d Power not disabled\n", id); } @@ -2217,7 +2274,7 @@ static bool cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *wrpll_params) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 afe_clock = crtc_state->port_clock * 5; u32 ref_clock; u32 dco_min = 7998000; @@ -2332,10 +2389,12 @@ cnl_ddi_dp_set_dpll_hw_state(struct intel_crtc_state *crtc_state) return true; } -static struct intel_shared_dpll * -cnl_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool cnl_get_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_shared_dpll *pll; bool bret; @@ -2343,31 +2402,36 @@ cnl_get_dpll(struct intel_crtc_state *crtc_state, bret = cnl_ddi_hdmi_pll_dividers(crtc_state); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); - return NULL; + return false; } } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = cnl_ddi_dp_set_dpll_hw_state(crtc_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); - return NULL; + return false; } } else { DRM_DEBUG_KMS("Skip DPLL setup for output_types 0x%x\n", crtc_state->output_types); - return NULL; + return false; } - pll = intel_find_shared_dpll(crtc_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL2); + pll = intel_find_shared_dpll(state, crtc, + &crtc_state->dpll_hw_state, + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1) | + BIT(DPLL_ID_SKL_DPLL0)); if (!pll) { DRM_DEBUG_KMS("No PLL selected\n"); - return NULL; + return false; } - intel_reference_shared_dpll(pll, crtc_state); + intel_reference_shared_dpll(state, crtc, + pll, &crtc_state->dpll_hw_state); - return pll; + crtc_state->shared_dpll = pll; + + return true; } static void cnl_dump_hw_state(struct drm_i915_private *dev_priv, @@ -2394,7 +2458,8 @@ static const struct dpll_info cnl_plls[] = { static const struct intel_dpll_mgr cnl_pll_mgr = { .dpll_info = cnl_plls, - .get_dpll = cnl_get_dpll, + .get_dplls = cnl_get_dpll, + .put_dplls = intel_put_dpll, .dump_hw_state = cnl_dump_hw_state, }; @@ -2473,10 +2538,22 @@ static const struct skl_wrpll_params icl_tbt_pll_19_2MHz_values = { .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0, }; +static const struct skl_wrpll_params tgl_tbt_pll_19_2MHz_values = { + .dco_integer = 0x54, .dco_fraction = 0x3000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + +static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = { + .dco_integer = 0x43, .dco_fraction = 0x4000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *pll_params) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); const struct icl_combo_pll_params *params = dev_priv->cdclk.hw.ref == 24000 ? icl_dp_combo_pll_24MHz_values : @@ -2498,22 +2575,50 @@ static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state, static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *pll_params) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + + if (INTEL_GEN(dev_priv) >= 12) { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = tgl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = tgl_tbt_pll_24MHz_values; + break; + } + } else { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = icl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = icl_tbt_pll_24MHz_values; + break; + } + } - *pll_params = dev_priv->cdclk.hw.ref == 24000 ? - icl_tbt_pll_24MHz_values : icl_tbt_pll_19_2MHz_values; return true; } static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) + struct intel_encoder *encoder, + struct intel_dpll_hw_state *pll_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 cfgcr0, cfgcr1; struct skl_wrpll_params pll_params = { 0 }; bool ret; - if (intel_port_is_tc(dev_priv, encoder->port)) + if (intel_phy_is_tc(dev_priv, intel_port_to_phy(dev_priv, + encoder->port))) ret = icl_calc_tbt_pll(crtc_state, &pll_params); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) || intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) @@ -2530,14 +2635,17 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | DPLL_CFGCR1_KDIV(pll_params.kdiv) | - DPLL_CFGCR1_PDIV(pll_params.pdiv) | - DPLL_CFGCR1_CENTRAL_FREQ_8400; + DPLL_CFGCR1_PDIV(pll_params.pdiv); - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); + if (INTEL_GEN(dev_priv) >= 12) + cfgcr1 |= TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL; + else + cfgcr1 |= DPLL_CFGCR1_CENTRAL_FREQ_8400; - crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; - crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; + memset(pll_state, 0, sizeof(*pll_state)); + + pll_state->cfgcr0 = cfgcr0; + pll_state->cfgcr1 = cfgcr1; return true; } @@ -2555,7 +2663,8 @@ enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, u32 *target_dco_khz, - struct intel_dpll_hw_state *state) + struct intel_dpll_hw_state *state, + bool is_dkl) { u32 dco_min_freq, dco_max_freq; int div1_vals[] = {7, 5, 3, 2}; @@ -2577,8 +2686,13 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, continue; if (div2 >= 2) { + /* + * Note: a_divratio not matching TGL BSpec + * algorithm but matching hardcoded values and + * working on HW for DP alt-mode at least + */ a_divratio = is_dp ? 10 : 5; - tlinedrv = 2; + tlinedrv = is_dkl ? 1 : 2; } else { a_divratio = 5; tlinedrv = 0; @@ -2627,10 +2741,10 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, * The specification for this function uses real numbers, so the math had to be * adapted to integer-only calculation, that's why it looks so different. */ -static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state) +static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, + struct intel_dpll_hw_state *pll_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_dpll_hw_state *pll_state = &crtc_state->dpll_hw_state; + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int refclk_khz = dev_priv->cdclk.hw.ref; int clock = crtc_state->port_clock; u32 dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; @@ -2641,11 +2755,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state) u64 tmp; bool use_ssc = false; bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + bool is_dkl = INTEL_GEN(dev_priv) >= 12; memset(pll_state, 0, sizeof(*pll_state)); if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, - pll_state)) { + pll_state, is_dkl)) { DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); return false; } @@ -2653,8 +2768,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state) m1div = 2; m2div_int = dco_khz / (refclk_khz * m1div); if (m2div_int > 255) { - m1div = 4; - m2div_int = dco_khz / (refclk_khz * m1div); + if (!is_dkl) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + } + if (m2div_int > 255) { DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", clock); @@ -2734,121 +2852,277 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state) } ssc_steplog = 4; - pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | - MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | - MG_PLL_DIV0_FBDIV_INT(m2div_int); - - pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | - MG_PLL_DIV1_DITHER_DIV_2 | - MG_PLL_DIV1_NDIVRATIO(1) | - MG_PLL_DIV1_FBPREDIV(m1div); - - pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | - MG_PLL_LF_AFCCNTSEL_512 | - MG_PLL_LF_GAINCTRL(1) | - MG_PLL_LF_INT_COEFF(int_coeff) | - MG_PLL_LF_PROP_COEFF(prop_coeff); - - pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | - MG_PLL_FRAC_LOCK_DCODITHEREN | - MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); - if (use_ssc || m2div_rem > 0) - pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; - - pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | - MG_PLL_SSC_TYPE(2) | - MG_PLL_SSC_STEPLENGTH(ssc_steplen) | - MG_PLL_SSC_STEPNUM(ssc_steplog) | - MG_PLL_SSC_FLLEN | - MG_PLL_SSC_STEPSIZE(ssc_stepsize); - - pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART | - MG_PLL_TDC_COLDST_IREFINT_EN | - MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | - MG_PLL_TDC_TDCOVCCORR_EN | - MG_PLL_TDC_TDCSEL(3); - - pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | - MG_PLL_BIAS_INIT_DCOAMP(0x3F) | - MG_PLL_BIAS_BIAS_BONUS(10) | - MG_PLL_BIAS_BIASCAL_EN | - MG_PLL_BIAS_CTRIM(12) | - MG_PLL_BIAS_VREF_RDAC(4) | - MG_PLL_BIAS_IREFTRIM(iref_trim); - - if (refclk_khz == 38400) { - pll_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; - pll_state->mg_pll_bias_mask = 0; + /* write pll_state calculations */ + if (is_dkl) { + pll_state->mg_pll_div0 = DKL_PLL_DIV0_INTEG_COEFF(int_coeff) | + DKL_PLL_DIV0_PROP_COEFF(prop_coeff) | + DKL_PLL_DIV0_FBPREDIV(m1div) | + DKL_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = DKL_PLL_DIV1_IREF_TRIM(iref_trim) | + DKL_PLL_DIV1_TDC_TARGET_CNT(tdc_targetcnt); + + pll_state->mg_pll_ssc = DKL_PLL_SSC_IREF_NDIV_RATIO(iref_ndiv) | + DKL_PLL_SSC_STEP_LEN(ssc_steplen) | + DKL_PLL_SSC_STEP_NUM(ssc_steplog) | + (use_ssc ? DKL_PLL_SSC_EN : 0); + + pll_state->mg_pll_bias = (m2div_frac ? DKL_PLL_BIAS_FRAC_EN_H : 0) | + DKL_PLL_BIAS_FBDIV_FRAC(m2div_frac); + + pll_state->mg_pll_tdc_coldst_bias = + DKL_PLL_TDC_SSC_STEP_SIZE(ssc_stepsize) | + DKL_PLL_TDC_FEED_FWD_GAIN(feedfwgain); + } else { - pll_state->mg_pll_tdc_coldst_bias_mask = -1U; - pll_state->mg_pll_bias_mask = -1U; - } + pll_state->mg_pll_div0 = + (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = + MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = + MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = + MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= + MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = + (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = + MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + + if (refclk_khz == 38400) { + pll_state->mg_pll_tdc_coldst_bias_mask = + MG_PLL_TDC_COLDST_COLDSTART; + pll_state->mg_pll_bias_mask = 0; + } else { + pll_state->mg_pll_tdc_coldst_bias_mask = -1U; + pll_state->mg_pll_bias_mask = -1U; + } - pll_state->mg_pll_tdc_coldst_bias &= pll_state->mg_pll_tdc_coldst_bias_mask; - pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + pll_state->mg_pll_tdc_coldst_bias &= + pll_state->mg_pll_tdc_coldst_bias_mask; + pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + } return true; } -static struct intel_shared_dpll * -icl_get_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +/** + * icl_set_active_port_dpll - select the active port DPLL for a given CRTC + * @crtc_state: state for the CRTC to select the DPLL for + * @port_dpll_id: the active @port_dpll_id to select + * + * Select the given @port_dpll_id instance from the DPLLs reserved for the + * CRTC. + */ +void icl_set_active_port_dpll(struct intel_crtc_state *crtc_state, + enum icl_port_dpll_id port_dpll_id) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_digital_port *intel_dig_port; - struct intel_shared_dpll *pll; + struct icl_port_dpll *port_dpll = + &crtc_state->icl_port_dplls[port_dpll_id]; + + crtc_state->shared_dpll = port_dpll->pll; + crtc_state->dpll_hw_state = port_dpll->hw_state; +} + +static void icl_update_active_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) +{ + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_digital_port *primary_port; + enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT; + + primary_port = encoder->type == INTEL_OUTPUT_DP_MST ? + enc_to_mst(encoder)->primary : + enc_to_dig_port(encoder); + + if (primary_port && + (primary_port->tc_mode == TC_PORT_DP_ALT || + primary_port->tc_mode == TC_PORT_LEGACY)) + port_dpll_id = ICL_PORT_DPLL_MG_PHY; + + icl_set_active_port_dpll(crtc_state, port_dpll_id); +} + +static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) +{ + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct icl_port_dpll *port_dpll = + &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum port port = encoder->port; - enum intel_dpll_id min, max; - bool ret; + unsigned long dpll_mask; - if (intel_port_is_combophy(dev_priv, port)) { - min = DPLL_ID_ICL_DPLL0; - max = DPLL_ID_ICL_DPLL1; - ret = icl_calc_dpll_state(crtc_state, encoder); - } else if (intel_port_is_tc(dev_priv, port)) { - if (encoder->type == INTEL_OUTPUT_DP_MST) { - struct intel_dp_mst_encoder *mst_encoder; + if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { + DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n"); - mst_encoder = enc_to_mst(&encoder->base); - intel_dig_port = mst_encoder->primary; - } else { - intel_dig_port = enc_to_dig_port(&encoder->base); - } + return false; + } - if (intel_dig_port->tc_type == TC_PORT_TBT) { - min = DPLL_ID_ICL_TBTPLL; - max = min; - ret = icl_calc_dpll_state(crtc_state, encoder); - } else { - enum tc_port tc_port; + if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + else + dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); + + port_dpll->pll = intel_find_shared_dpll(state, crtc, + &port_dpll->hw_state, + dpll_mask); + if (!port_dpll->pll) { + DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n", + encoder->base.base.id, encoder->base.name); + return false; + } - tc_port = intel_port_to_tc(dev_priv, port); - min = icl_tc_port_to_pll_id(tc_port); - max = min; - ret = icl_calc_mg_pll_state(crtc_state); - } - } else { - MISSING_CASE(port); - return NULL; + intel_reference_shared_dpll(state, crtc, + port_dpll->pll, &port_dpll->hw_state); + + icl_update_active_dpll(state, crtc, encoder); + + return true; +} + +static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct icl_port_dpll *port_dpll; + enum intel_dpll_id dpll_id; + + port_dpll = &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; + if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { + DRM_DEBUG_KMS("Could not calculate TBT PLL state.\n"); + return false; } - if (!ret) { - DRM_DEBUG_KMS("Could not calculate PLL state.\n"); - return NULL; + port_dpll->pll = intel_find_shared_dpll(state, crtc, + &port_dpll->hw_state, + BIT(DPLL_ID_ICL_TBTPLL)); + if (!port_dpll->pll) { + DRM_DEBUG_KMS("No TBT-ALT PLL found\n"); + return false; } + intel_reference_shared_dpll(state, crtc, + port_dpll->pll, &port_dpll->hw_state); - pll = intel_find_shared_dpll(crtc_state, min, max); - if (!pll) { - DRM_DEBUG_KMS("No PLL selected\n"); - return NULL; + port_dpll = &crtc_state->icl_port_dplls[ICL_PORT_DPLL_MG_PHY]; + if (!icl_calc_mg_pll_state(crtc_state, &port_dpll->hw_state)) { + DRM_DEBUG_KMS("Could not calculate MG PHY PLL state.\n"); + goto err_unreference_tbt_pll; } - intel_reference_shared_dpll(pll, crtc_state); + dpll_id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, + encoder->port)); + port_dpll->pll = intel_find_shared_dpll(state, crtc, + &port_dpll->hw_state, + BIT(dpll_id)); + if (!port_dpll->pll) { + DRM_DEBUG_KMS("No MG PHY PLL found\n"); + goto err_unreference_tbt_pll; + } + intel_reference_shared_dpll(state, crtc, + port_dpll->pll, &port_dpll->hw_state); - return pll; + icl_update_active_dpll(state, crtc, encoder); + + return true; + +err_unreference_tbt_pll: + port_dpll = &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; + intel_unreference_shared_dpll(state, crtc, port_dpll->pll); + + return false; +} + +static bool icl_get_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (intel_phy_is_combo(dev_priv, phy)) + return icl_get_combo_phy_dpll(state, crtc, encoder); + else if (intel_phy_is_tc(dev_priv, phy)) + return icl_get_tc_phy_dplls(state, crtc, encoder); + + MISSING_CASE(phy); + + return false; +} + +static void icl_put_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + enum icl_port_dpll_id id; + + new_crtc_state->shared_dpll = NULL; + + for (id = ICL_PORT_DPLL_DEFAULT; id < ICL_PORT_DPLL_COUNT; id++) { + const struct icl_port_dpll *old_port_dpll = + &old_crtc_state->icl_port_dplls[id]; + struct icl_port_dpll *new_port_dpll = + &new_crtc_state->icl_port_dplls[id]; + + new_port_dpll->pll = NULL; + + if (!old_port_dpll->pll) + continue; + + intel_unreference_shared_dpll(state, crtc, old_port_dpll->pll); + } } static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2913,6 +3187,78 @@ out: return ret; } +static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + intel_wakeref_t wakeref; + bool ret = false; + u32 val; + + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_DISPLAY_CORE); + if (!wakeref) + return false; + + val = I915_READ(MG_PLL_ENABLE(tc_port)); + if (!(val & PLL_ENABLE)) + goto out; + + /* + * All registers read here have the same HIP_INDEX_REG even though + * they are on different building blocks + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + hw_state->mg_refclkin_ctl = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; + + hw_state->mg_clktop2_hsclkctl = + I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + hw_state->mg_clktop2_hsclkctl &= + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; + + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + hw_state->mg_clktop2_coreclkctl1 &= + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + + hw_state->mg_pll_div0 = I915_READ(DKL_PLL_DIV0(tc_port)); + hw_state->mg_pll_div0 &= (DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + + hw_state->mg_pll_div1 = I915_READ(DKL_PLL_DIV1(tc_port)); + hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + + hw_state->mg_pll_ssc = I915_READ(DKL_PLL_SSC(tc_port)); + hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + + hw_state->mg_pll_bias = I915_READ(DKL_PLL_BIAS(tc_port)); + hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); + return ret; +} + static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state, @@ -2932,8 +3278,18 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, if (!(val & PLL_ENABLE)) goto out; - hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); - hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + if (INTEL_GEN(dev_priv) >= 12) { + hw_state->cfgcr0 = I915_READ(TGL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(TGL_DPLL_CFGCR1(id)); + } else { + if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) { + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(4)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(4)); + } else { + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + } + } ret = true; out: @@ -2945,8 +3301,14 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - return icl_pll_get_hw_state(dev_priv, pll, hw_state, - CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + } + + return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2961,10 +3323,24 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv, { struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; const enum intel_dpll_id id = pll->info->id; + i915_reg_t cfgcr0_reg, cfgcr1_reg; - I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); - I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); - POSTING_READ(ICL_DPLL_CFGCR1(id)); + if (INTEL_GEN(dev_priv) >= 12) { + cfgcr0_reg = TGL_DPLL_CFGCR0(id); + cfgcr1_reg = TGL_DPLL_CFGCR1(id); + } else { + if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) { + cfgcr0_reg = ICL_DPLL_CFGCR0(4); + cfgcr1_reg = ICL_DPLL_CFGCR1(4); + } else { + cfgcr0_reg = ICL_DPLL_CFGCR0(id); + cfgcr1_reg = ICL_DPLL_CFGCR1(id); + } + } + + I915_WRITE(cfgcr0_reg, hw_state->cfgcr0); + I915_WRITE(cfgcr1_reg, hw_state->cfgcr1); + POSTING_READ(cfgcr1_reg); } static void icl_mg_pll_write(struct drm_i915_private *dev_priv, @@ -3017,6 +3393,75 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } +static void dkl_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); + u32 val; + + /* + * All registers programmed here have the same HIP_INDEX_REG even + * though on different building block + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + /* All the registers are RMW */ + val = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; + val |= hw_state->mg_refclkin_ctl; + I915_WRITE(DKL_REFCLKIN_CTL(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + val |= hw_state->mg_clktop2_coreclkctl1; + I915_WRITE(DKL_CLKTOP2_CORECLKCTL1(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); + val |= hw_state->mg_clktop2_hsclkctl; + I915_WRITE(DKL_CLKTOP2_HSCLKCTL(tc_port), val); + + val = I915_READ(DKL_PLL_DIV0(tc_port)); + val &= ~(DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + val |= hw_state->mg_pll_div0; + I915_WRITE(DKL_PLL_DIV0(tc_port), val); + + val = I915_READ(DKL_PLL_DIV1(tc_port)); + val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + val |= hw_state->mg_pll_div1; + I915_WRITE(DKL_PLL_DIV1(tc_port), val); + + val = I915_READ(DKL_PLL_SSC(tc_port)); + val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + val |= hw_state->mg_pll_ssc; + I915_WRITE(DKL_PLL_SSC(tc_port), val); + + val = I915_READ(DKL_PLL_BIAS(tc_port)); + val &= ~(DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + val |= hw_state->mg_pll_bias; + I915_WRITE(DKL_PLL_BIAS(tc_port), val); + + val = I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + val |= hw_state->mg_pll_tdc_coldst_bias; + I915_WRITE(DKL_PLL_TDC_COLDST_BIAS(tc_port), val); + + POSTING_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); +} + static void icl_pll_power_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, i915_reg_t enable_reg) @@ -3031,8 +3476,7 @@ static void icl_pll_power_enable(struct drm_i915_private *dev_priv, * The spec says we need to "wait" but it also says it should be * immediate. */ - if (intel_wait_for_register(&dev_priv->uncore, enable_reg, - PLL_POWER_STATE, PLL_POWER_STATE, 1)) + if (intel_de_wait_for_set(dev_priv, enable_reg, PLL_POWER_STATE, 1)) DRM_ERROR("PLL %d Power not enabled\n", pll->info->id); } @@ -3047,8 +3491,7 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(enable_reg, val); /* Timeout is actually 600us. */ - if (intel_wait_for_register(&dev_priv->uncore, enable_reg, - PLL_LOCK, PLL_LOCK, 1)) + if (intel_de_wait_for_set(dev_priv, enable_reg, PLL_LOCK, 1)) DRM_ERROR("PLL %d not locked\n", pll->info->id); } @@ -3057,6 +3500,19 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, { i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + + /* + * We need to disable DC states when this DPLL is enabled. + * This can be done by taking a reference on DPLL4 power + * domain. + */ + pll->wakeref = intel_display_power_get(dev_priv, + POWER_DOMAIN_DPLL_DC_OFF); + } + icl_pll_power_enable(dev_priv, pll, enable_reg); icl_dpll_write(dev_priv, pll); @@ -3098,7 +3554,10 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv, icl_pll_power_enable(dev_priv, pll, enable_reg); - icl_mg_pll_write(dev_priv, pll); + if (INTEL_GEN(dev_priv) >= 12) + dkl_pll_write(dev_priv, pll); + else + icl_mg_pll_write(dev_priv, pll); /* * DVFS pre sequence would be here, but in our driver the cdclk code @@ -3130,8 +3589,7 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, I915_WRITE(enable_reg, val); /* Timeout is actually 1us. */ - if (intel_wait_for_register(&dev_priv->uncore, - enable_reg, PLL_LOCK, 0, 1)) + if (intel_de_wait_for_clear(dev_priv, enable_reg, PLL_LOCK, 1)) DRM_ERROR("PLL %d locked\n", pll->info->id); /* DVFS post sequence would be here. See the comment above. */ @@ -3144,15 +3602,26 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, * The spec says we need to "wait" but it also says it should be * immediate. */ - if (intel_wait_for_register(&dev_priv->uncore, - enable_reg, PLL_POWER_STATE, 0, 1)) + if (intel_de_wait_for_clear(dev_priv, enable_reg, PLL_POWER_STATE, 1)) DRM_ERROR("PLL %d Power not disabled\n", pll->info->id); } static void combo_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - icl_pll_disable(dev_priv, pll, CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + icl_pll_disable(dev_priv, pll, enable_reg); + + intel_display_power_put(dev_priv, POWER_DOMAIN_DPLL_DC_OFF, + pll->wakeref); + return; + } + + icl_pll_disable(dev_priv, pll, enable_reg); } static void tbt_pll_disable(struct drm_i915_private *dev_priv, @@ -3223,19 +3692,50 @@ static const struct dpll_info icl_plls[] = { static const struct intel_dpll_mgr icl_pll_mgr = { .dpll_info = icl_plls, - .get_dpll = icl_get_dpll, + .get_dplls = icl_get_dplls, + .put_dplls = icl_put_dplls, + .update_active_dpll = icl_update_active_dpll, .dump_hw_state = icl_dump_hw_state, }; static const struct dpll_info ehl_plls[] = { { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, { }, }; static const struct intel_dpll_mgr ehl_pll_mgr = { .dpll_info = ehl_plls, - .get_dpll = icl_get_dpll, + .get_dplls = icl_get_dplls, + .put_dplls = icl_put_dplls, + .dump_hw_state = icl_dump_hw_state, +}; + +static const struct intel_shared_dpll_funcs dkl_pll_funcs = { + .enable = mg_pll_enable, + .disable = mg_pll_disable, + .get_hw_state = dkl_pll_get_hw_state, +}; + +static const struct dpll_info tgl_plls[] = { + { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, + { "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 }, + { "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 }, + { }, +}; + +static const struct intel_dpll_mgr tgl_pll_mgr = { + .dpll_info = tgl_plls, + .get_dplls = icl_get_dplls, + .put_dplls = icl_put_dplls, + .update_active_dpll = icl_update_active_dpll, .dump_hw_state = icl_dump_hw_state, }; @@ -3252,7 +3752,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_ELKHARTLAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 12) + dpll_mgr = &tgl_pll_mgr; + else if (IS_ELKHARTLAKE(dev_priv)) dpll_mgr = &ehl_pll_mgr; else if (INTEL_GEN(dev_priv) >= 11) dpll_mgr = &icl_pll_mgr; @@ -3287,50 +3789,87 @@ void intel_shared_dpll_init(struct drm_device *dev) } /** - * intel_get_shared_dpll - get a shared DPLL for CRTC and encoder combination - * @crtc_state: atomic state for the crtc + * intel_reserve_shared_dplls - reserve DPLLs for CRTC and encoder combination + * @state: atomic state + * @crtc: CRTC to reserve DPLLs for * @encoder: encoder * - * Find an appropriate DPLL for the given CRTC and encoder combination. A - * reference from the @crtc_state to the returned pll is registered in the - * atomic state. That configuration is made effective by calling - * intel_shared_dpll_swap_state(). The reference should be released by calling - * intel_release_shared_dpll(). + * This function reserves all required DPLLs for the given CRTC and encoder + * combination in the current atomic commit @state and the new @crtc atomic + * state. + * + * The new configuration in the atomic commit @state is made effective by + * calling intel_shared_dpll_swap_state(). + * + * The reserved DPLLs should be released by calling + * intel_release_shared_dplls(). * * Returns: - * A shared DPLL to be used by @crtc_state and @encoder. + * True if all required DPLLs were successfully reserved. */ -struct intel_shared_dpll * -intel_get_shared_dpll(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +bool intel_reserve_shared_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); const struct intel_dpll_mgr *dpll_mgr = dev_priv->dpll_mgr; if (WARN_ON(!dpll_mgr)) - return NULL; + return false; - return dpll_mgr->get_dpll(crtc_state, encoder); + return dpll_mgr->get_dplls(state, crtc, encoder); } /** - * intel_release_shared_dpll - end use of DPLL by CRTC in atomic state - * @dpll: dpll in use by @crtc - * @crtc: crtc + * intel_release_shared_dplls - end use of DPLLs by CRTC in atomic state * @state: atomic state + * @crtc: crtc from which the DPLLs are to be released + * + * This function releases all DPLLs reserved by intel_reserve_shared_dplls() + * from the current atomic commit @state and the old @crtc atomic state. * - * This function releases the reference from @crtc to @dpll from the - * atomic @state. The new configuration is made effective by calling - * intel_shared_dpll_swap_state(). + * The new configuration in the atomic commit @state is made effective by + * calling intel_shared_dpll_swap_state(). */ -void intel_release_shared_dpll(struct intel_shared_dpll *dpll, - struct intel_crtc *crtc, - struct drm_atomic_state *state) +void intel_release_shared_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_shared_dpll_state *shared_dpll_state; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_dpll_mgr *dpll_mgr = dev_priv->dpll_mgr; + + /* + * FIXME: this function is called for every platform having a + * compute_clock hook, even though the platform doesn't yet support + * the shared DPLL framework and intel_reserve_shared_dplls() is not + * called on those. + */ + if (!dpll_mgr) + return; + + dpll_mgr->put_dplls(state, crtc); +} + +/** + * intel_update_active_dpll - update the active DPLL for a CRTC/encoder + * @state: atomic state + * @crtc: the CRTC for which to update the active DPLL + * @encoder: encoder determining the type of port DPLL + * + * Update the active DPLL for the given @crtc/@encoder in @crtc's atomic state, + * from the port DPLLs reserved previously by intel_reserve_shared_dplls(). The + * DPLL selected will be based on the current mode of the encoder's port. + */ +void intel_update_active_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct intel_dpll_mgr *dpll_mgr = dev_priv->dpll_mgr; + + if (WARN_ON(!dpll_mgr)) + return; - shared_dpll_state = intel_atomic_get_shared_dpll_state(state); - shared_dpll_state[dpll->info->id].crtc_mask &= ~(1 << crtc->pipe); + dpll_mgr->update_active_dpll(state, crtc, encoder); } /** diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index d0570414f3d1..2a104c64291d 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include "intel_display.h" +#include "intel_wakeref.h" /*FIXME: Move this to a more appropriate place. */ #define abs_diff(a, b) ({ \ @@ -36,9 +37,9 @@ (void) (&__a == &__b); \ __a > __b ? (__a - __b) : (__b - __a); }) -struct drm_atomic_state; struct drm_device; struct drm_i915_private; +struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_encoder; @@ -110,35 +111,59 @@ enum intel_dpll_id { /** - * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 + * @DPLL_ID_ICL_DPLL0: ICL/TGL combo PHY DPLL0 */ DPLL_ID_ICL_DPLL0 = 0, /** - * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 + * @DPLL_ID_ICL_DPLL1: ICL/TGL combo PHY DPLL1 */ DPLL_ID_ICL_DPLL1 = 1, /** - * @DPLL_ID_ICL_TBTPLL: ICL TBT PLL + * @DPLL_ID_EHL_DPLL4: EHL combo PHY DPLL4 + */ + DPLL_ID_EHL_DPLL4 = 2, + /** + * @DPLL_ID_ICL_TBTPLL: ICL/TGL TBT PLL */ DPLL_ID_ICL_TBTPLL = 2, /** - * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C), + * TGL TC PLL 1 port 1 (TC1) */ DPLL_ID_ICL_MGPLL1 = 3, /** * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) + * TGL TC PLL 1 port 2 (TC2) */ DPLL_ID_ICL_MGPLL2 = 4, /** * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) + * TGL TC PLL 1 port 3 (TC3) */ DPLL_ID_ICL_MGPLL3 = 5, /** * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) + * TGL TC PLL 1 port 4 (TC4) */ DPLL_ID_ICL_MGPLL4 = 6, + /** + * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5) + */ + DPLL_ID_TGL_MGPLL5 = 7, + /** + * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6) + */ + DPLL_ID_TGL_MGPLL6 = 8, +}; + +#define I915_NUM_PLLS 9 + +enum icl_port_dpll_id { + ICL_PORT_DPLL_DEFAULT, + ICL_PORT_DPLL_MG_PHY, + + ICL_PORT_DPLL_COUNT, }; -#define I915_NUM_PLLS 7 struct intel_dpll_hw_state { /* i9xx, pch plls */ @@ -195,7 +220,7 @@ struct intel_dpll_hw_state { * future state which would be applied by an atomic mode set (stored in * a struct &intel_atomic_state). * - * See also intel_get_shared_dpll() and intel_release_shared_dpll(). + * See also intel_reserve_shared_dplls() and intel_release_shared_dplls(). */ struct intel_shared_dpll_state { /** @@ -312,6 +337,12 @@ struct intel_shared_dpll { * @info: platform specific info */ const struct dpll_info *info; + + /** + * @wakeref: In some platforms a device-level runtime pm reference may + * need to be grabbed to disable DC states while this DPLL is enabled + */ + intel_wakeref_t wakeref; }; #define SKL_DPLL0 0 @@ -331,15 +362,20 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, bool state); #define assert_shared_dpll_enabled(d, p) assert_shared_dpll(d, p, true) #define assert_shared_dpll_disabled(d, p) assert_shared_dpll(d, p, false) -struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc_state *state, - struct intel_encoder *encoder); -void intel_release_shared_dpll(struct intel_shared_dpll *dpll, - struct intel_crtc *crtc, - struct drm_atomic_state *state); +bool intel_reserve_shared_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder); +void intel_release_shared_dplls(struct intel_atomic_state *state, + struct intel_crtc *crtc); +void icl_set_active_port_dpll(struct intel_crtc_state *crtc_state, + enum icl_port_dpll_id port_dpll_id); +void intel_update_active_dpll(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder); void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state); void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state); void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state); -void intel_shared_dpll_swap_state(struct drm_atomic_state *state); +void intel_shared_dpll_swap_state(struct intel_atomic_state *state); void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c new file mode 100644 index 000000000000..ada006a690df --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + * + */ + +#include "i915_drv.h" +#include "intel_display_types.h" + +#define DSB_BUF_SIZE (2 * PAGE_SIZE) + +/** + * DOC: DSB + * + * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory + * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA + * engine that can be programmed to download the DSB from memory. + * It allows driver to batch submit display HW programming. This helps to + * reduce loading time and CPU activity, thereby making the context switch + * faster. DSB Support added from Gen12 Intel graphics based platform. + * + * DSB's can access only the pipe, plane, and transcoder Data Island Packet + * registers. + * + * DSB HW can support only register writes (both indexed and direct MMIO + * writes). There are no registers reads possible with DSB HW engine. + */ + +/* DSB opcodes. */ +#define DSB_OPCODE_SHIFT 24 +#define DSB_OPCODE_MMIO_WRITE 0x1 +#define DSB_OPCODE_INDEXED_WRITE 0x9 +#define DSB_BYTE_EN 0xF +#define DSB_BYTE_EN_SHIFT 20 +#define DSB_REG_VALUE_MASK 0xfffff + +static inline bool is_dsb_busy(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + return DSB_STATUS & I915_READ(DSB_CTRL(pipe, dsb->id)); +} + +static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl |= DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + +static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl &= ~DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + +/** + * intel_dsb_get() - Allocate DSB context and return a DSB instance. + * @crtc: intel_crtc structure to get pipe info. + * + * This function provides handle of a DSB instance, for the further DSB + * operations. + * + * Returns: address of Intel_dsb instance requested for. + * Failure: Returns the same DSB instance, but without a command buffer. + */ + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *i915 = to_i915(dev); + struct intel_dsb *dsb = &crtc->dsb; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *buf; + intel_wakeref_t wakeref; + + if (!HAS_DSB(i915)) + return dsb; + + if (dsb->refcount++ != 0) + return dsb; + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE); + if (IS_ERR(obj)) { + DRM_ERROR("Gem object creation failed\n"); + goto out; + } + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + DRM_ERROR("Vma creation failed\n"); + i915_gem_object_put(obj); + goto out; + } + + buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(buf)) { + DRM_ERROR("Command buffer creation failed\n"); + goto out; + } + + dsb->id = DSB1; + dsb->vma = vma; + dsb->cmd_buf = buf; + +out: + /* + * On error dsb->cmd_buf will continue to be NULL, making the writes + * pass-through. Leave the dangling ref to be removed later by the + * corresponding intel_dsb_put(): the important error message will + * already be logged above. + */ + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + + return dsb; +} + +/** + * intel_dsb_put() - To destroy DSB context. + * @dsb: intel_dsb structure. + * + * This function destroys the DSB context allocated by a dsb_get(), by + * unpinning and releasing the VMA object associated with it. + */ + +void intel_dsb_put(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + if (!HAS_DSB(i915)) + return; + + if (WARN_ON(dsb->refcount == 0)) + return; + + if (--dsb->refcount == 0) { + i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); + dsb->cmd_buf = NULL; + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + } +} + +/** + * intel_dsb_indexed_reg_write() -Write to the DSB context for auto + * increment register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB for auto-increment register. During command buffer overflow, + * a warning is thrown and rest all erroneous condition register programming + * is done through mmio write. + */ + +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + u32 reg_val; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; + } + + /* + * For example the buffer will look like below for 3 dwords for auto + * increment register: + * +--------------------------------------------------------+ + * | size = 3 | offset &| value1 | value2 | value3 | zero | + * | | opcode | | | | | + * +--------------------------------------------------------+ + * + + + + + + + + * 0 4 8 12 16 20 24 + * Byte + * + * As every instruction is 8 byte aligned the index of dsb instruction + * will start always from even number while dealing with u32 array. If + * we are writing odd no of dwords, Zeros will be added in the end for + * padding. + */ + reg_val = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK; + if (reg_val != i915_mmio_reg_offset(reg)) { + /* Every instruction should be 8 byte aligned. */ + dsb->free_pos = ALIGN(dsb->free_pos, 2); + + dsb->ins_start_offset = dsb->free_pos; + + /* Update the size. */ + buf[dsb->free_pos++] = 1; + + /* Update the opcode and reg. */ + buf[dsb->free_pos++] = (DSB_OPCODE_INDEXED_WRITE << + DSB_OPCODE_SHIFT) | + i915_mmio_reg_offset(reg); + + /* Update the value. */ + buf[dsb->free_pos++] = val; + } else { + /* Update the new value. */ + buf[dsb->free_pos++] = val; + + /* Update the size. */ + buf[dsb->ins_start_offset]++; + } + + /* if number of data words is odd, then the last dword should be 0.*/ + if (dsb->free_pos & 0x1) + buf[dsb->free_pos] = 0; +} + +/** + * intel_dsb_reg_write() -Write to the DSB context for normal + * register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB. During command buffer overflow, a warning is thrown + * and rest all erroneous condition register programming is done + * through mmio write. + */ +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; + } + + dsb->ins_start_offset = dsb->free_pos; + buf[dsb->free_pos++] = val; + buf[dsb->free_pos++] = (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + i915_mmio_reg_offset(reg); +} + +/** + * intel_dsb_commit() - Trigger workload execution of DSB. + * @dsb: intel_dsb structure. + * + * This function is used to do actual write to hardware using DSB. + * On errors, fall back to MMIO. Also this function help to reset the context. + */ +void intel_dsb_commit(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe = crtc->pipe; + u32 tail; + + if (!dsb->free_pos) + return; + + if (!intel_dsb_enable_engine(dsb)) + goto reset; + + if (is_dsb_busy(dsb)) { + DRM_ERROR("HEAD_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + I915_WRITE(DSB_HEAD(pipe, dsb->id), i915_ggtt_offset(dsb->vma)); + + tail = ALIGN(dsb->free_pos * 4, CACHELINE_BYTES); + if (tail > dsb->free_pos * 4) + memset(&dsb->cmd_buf[dsb->free_pos], 0, + (tail - dsb->free_pos * 4)); + + if (is_dsb_busy(dsb)) { + DRM_ERROR("TAIL_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + DRM_DEBUG_KMS("DSB execution started - head 0x%x, tail 0x%x\n", + i915_ggtt_offset(dsb->vma), tail); + I915_WRITE(DSB_TAIL(pipe, dsb->id), i915_ggtt_offset(dsb->vma) + tail); + if (wait_for(!is_dsb_busy(dsb), 1)) { + DRM_ERROR("Timed out waiting for DSB workload completion.\n"); + goto reset; + } + +reset: + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + intel_dsb_disable_engine(dsb); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h new file mode 100644 index 000000000000..395ef9ce558e --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _INTEL_DSB_H +#define _INTEL_DSB_H + +#include <linux/types.h> + +#include "i915_reg.h" + +struct intel_crtc; +struct i915_vma; + +enum dsb_id { + INVALID_DSB = -1, + DSB1, + DSB2, + DSB3, + MAX_DSB_PER_PIPE +}; + +struct intel_dsb { + long refcount; + enum dsb_id id; + u32 *cmd_buf; + struct i915_vma *vma; + + /* + * free_pos will point the first free entry position + * and help in calculating tail of command buffer. + */ + int free_pos; + + /* + * ins_start_offset will help to store start address of the dsb + * instuction and help in identifying the batch of auto-increment + * register. + */ + u32 ins_start_offset; +}; + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc); +void intel_dsb_put(struct intel_dsb *dsb); +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val); +void intel_dsb_commit(struct intel_dsb *dsb); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c index 5fec02aceaed..a2a937109a5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.c +++ b/drivers/gpu/drm/i915/display/intel_dsi.c @@ -55,6 +55,7 @@ int intel_dsi_get_modes(struct drm_connector *connector) enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -73,7 +74,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; } - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi, diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index 6d20434636cd..19f78a4022d3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -26,7 +26,8 @@ #include <drm/drm_crtc.h> #include <drm/drm_mipi_dsi.h> -#include "intel_drv.h" + +#include "intel_display_types.h" #define INTEL_DSI_VIDEO_MODE 0 #define INTEL_DSI_COMMAND_MODE 1 @@ -44,13 +45,17 @@ struct intel_dsi { struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; intel_wakeref_t io_wakeref[I915_MAX_PORTS]; - /* GPIO Desc for CRC based Panel control */ + /* GPIO Desc for panel and backlight control */ struct gpio_desc *gpio_panel; + struct gpio_desc *gpio_backlight; struct intel_connector *attached_connector; - /* bit mask of ports being driven */ - u16 ports; + /* bit mask of ports (vlv dsi) or phys (icl dsi) being driven */ + union { + u16 ports; /* VLV DSI */ + u16 phys; /* ICL DSI */ + }; /* if true, use HS mode, otherwise LP */ bool hs; @@ -64,6 +69,9 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; + /* i2c bus associated with the slave device */ + int i2c_bus_num; + /* * video mode pixel format * @@ -132,11 +140,14 @@ static inline struct intel_dsi_host *to_intel_dsi_host(struct mipi_dsi_host *h) return container_of(h, struct intel_dsi_host, base); } -#define for_each_dsi_port(__port, __ports_mask) for_each_port_masked(__port, __ports_mask) +#define for_each_dsi_port(__port, __ports_mask) \ + for_each_port_masked(__port, __ports_mask) +#define for_each_dsi_phy(__phy, __phys_mask) \ + for_each_phy_masked(__phy, __phys_mask) -static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) +static inline struct intel_dsi *enc_to_intel_dsi(struct intel_encoder *encoder) { - return container_of(encoder, struct intel_dsi, base.base); + return container_of(&encoder->base, struct intel_dsi, base.base); } static inline bool is_vid_mode(struct intel_dsi *intel_dsi) @@ -151,7 +162,7 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi) static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder) { - return enc_to_intel_dsi(&encoder->base)->ports; + return enc_to_intel_dsi(encoder)->ports; } /* icl_dsi.c */ @@ -196,6 +207,8 @@ void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); /* intel_dsi_vbt.c */ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); +void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on); +void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi); void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, enum mipi_seq seq_id); void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec); diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c index 8c33262cb0b2..c87838843d0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c @@ -27,7 +27,7 @@ #include <video/mipi_display.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dsi.h" #include "intel_dsi_dcs_backlight.h" @@ -46,7 +46,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) { struct intel_encoder *encoder = connector->encoder; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi_device; u8 data = 0; enum port port; @@ -64,7 +64,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 level) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct mipi_dsi_device *dsi_device; u8 data = level; enum port port; @@ -79,7 +79,7 @@ static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 static void dcs_disable_backlight(const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct mipi_dsi_device *dsi_device; enum port port; @@ -113,7 +113,7 @@ static void dcs_disable_backlight(const struct drm_connector_state *conn_state) static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; struct mipi_dsi_device *dsi_device; enum port port; diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index e5b178660408..04f953ba8f00 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -25,7 +25,10 @@ */ #include <linux/gpio/consumer.h> +#include <linux/gpio/machine.h> #include <linux/mfd/intel_soc_pmic.h> +#include <linux/pinctrl/consumer.h> +#include <linux/pinctrl/machine.h> #include <linux/slab.h> #include <asm/intel-mid.h> @@ -38,7 +41,7 @@ #include <video/mipi_display.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dsi.h" #include "intel_sideband.h" @@ -83,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = { { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; +struct i2c_adapter_lookup { + u16 slave_addr; + struct intel_dsi *intel_dsi; + acpi_handle dev_handle; +}; + #define CHV_GPIO_IDX_START_N 0 #define CHV_GPIO_IDX_START_E 73 #define CHV_GPIO_IDX_START_SW 100 @@ -375,11 +384,112 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +#ifdef CONFIG_ACPI +static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) +{ + struct i2c_adapter_lookup *lookup = data; + struct intel_dsi *intel_dsi = lookup->intel_dsi; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter; + acpi_handle adapter_handle; + acpi_status status; + + if (!i2c_acpi_get_i2c_resource(ares, &sb)) + return 1; + + if (lookup->slave_addr != sb->slave_address) + return 1; + + status = acpi_get_handle(lookup->dev_handle, + sb->resource_source.string_ptr, + &adapter_handle); + if (ACPI_FAILURE(status)) + return 1; + + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); + if (adapter) + intel_dsi->i2c_bus_num = adapter->nr; + + return 1; +} + +static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) +{ + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = &drm_dev->pdev->dev; + struct acpi_device *acpi_dev; + struct list_head resource_list; + struct i2c_adapter_lookup lookup; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(&lookup, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(acpi_dev, &resource_list, + i2c_adapter_lookup, + &lookup); + acpi_dev_free_resource_list(&resource_list); + } +} +#else +static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) +{ +} +#endif + static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) { - DRM_DEBUG_KMS("Skipping I2C element execution\n"); + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = &drm_dev->pdev->dev; + struct i2c_adapter *adapter; + struct i2c_msg msg; + int ret; + u8 vbt_i2c_bus_num = *(data + 2); + u16 slave_addr = *(u16 *)(data + 3); + u8 reg_offset = *(data + 5); + u8 payload_size = *(data + 6); + u8 *payload_data; + + if (intel_dsi->i2c_bus_num < 0) { + intel_dsi->i2c_bus_num = vbt_i2c_bus_num; + i2c_acpi_find_adapter(intel_dsi, slave_addr); + } + + adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); + if (!adapter) { + DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n"); + goto err_bus; + } - return data + *(data + 6) + 7; + payload_data = kzalloc(payload_size + 1, GFP_KERNEL); + if (!payload_data) + goto err_alloc; + + payload_data[0] = reg_offset; + memcpy(&payload_data[1], (data + 7), payload_size); + + msg.addr = slave_addr; + msg.flags = 0; + msg.len = payload_size + 1; + msg.buf = payload_data; + + ret = i2c_transfer(adapter, &msg, 1); + if (ret < 0) + DRM_DEV_ERROR(dev, + "Failed to xfer payload of size (%u) to reg (%u)\n", + payload_size, reg_offset); + + kfree(payload_data); +err_alloc: + i2c_put_adapter(adapter); +err_bus: + return data + payload_size + 7; } static const u8 *mipi_exec_spi(struct intel_dsi *intel_dsi, const u8 *data) @@ -453,8 +563,8 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, - enum mipi_seq seq_id) +static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; @@ -519,6 +629,22 @@ void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, } } +void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) +{ + if (seq_id == MIPI_SEQ_POWER_ON && intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + if (seq_id == MIPI_SEQ_BACKLIGHT_ON && intel_dsi->gpio_backlight) + gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 1); + + intel_dsi_vbt_exec(intel_dsi, seq_id); + + if (seq_id == MIPI_SEQ_POWER_OFF && intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); + if (seq_id == MIPI_SEQ_BACKLIGHT_OFF && intel_dsi->gpio_backlight) + gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 0); +} + void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) { struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); @@ -664,6 +790,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->panel_off_delay = pps->panel_off_delay / 10; intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10; + intel_dsi->i2c_bus_num = -1; + /* a regular driver would get the device in probe */ for_each_dsi_port(port, intel_dsi->ports) { mipi_dsi_attach(intel_dsi->dsi_hosts[port]->device); @@ -671,3 +799,110 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) return true; } + +/* + * On some BYT/CHT devs some sequences are incomplete and we need to manually + * control some GPIOs. We need to add a GPIO lookup table before we get these. + * If the GOP did not initialize the panel (HDMI inserted) we may need to also + * change the pinmux for the SoC's PWM0 pin from GPIO to PWM. + */ +static struct gpiod_lookup_table pmic_panel_gpio_table = { + /* Intel GFX is consumer */ + .dev_id = "0000:00:02.0", + .table = { + /* Panel EN/DISABLE */ + GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static struct gpiod_lookup_table soc_panel_gpio_table = { + .dev_id = "0000:00:02.0", + .table = { + GPIO_LOOKUP("INT33FC:01", 10, "backlight", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:01", 11, "panel", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static const struct pinctrl_map soc_pwm_pinctrl_map[] = { + PIN_MAP_MUX_GROUP("0000:00:02.0", "soc_pwm0", "INT33FC:00", + "pwm0_grp", "pwm"), +}; + +void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) +{ + struct drm_device *dev = intel_dsi->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct mipi_config *mipi_config = dev_priv->vbt.dsi.config; + enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + bool want_backlight_gpio = false; + bool want_panel_gpio = false; + struct pinctrl *pinctrl; + int ret; + + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + mipi_config->pwm_blc == PPS_BLC_PMIC) { + gpiod_add_lookup_table(&pmic_panel_gpio_table); + want_panel_gpio = true; + } + + if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { + gpiod_add_lookup_table(&soc_panel_gpio_table); + want_panel_gpio = true; + want_backlight_gpio = true; + + /* Ensure PWM0 pin is muxed as PWM instead of GPIO */ + ret = pinctrl_register_mappings(soc_pwm_pinctrl_map, + ARRAY_SIZE(soc_pwm_pinctrl_map)); + if (ret) + DRM_ERROR("Failed to register pwm0 pinmux mapping\n"); + + pinctrl = devm_pinctrl_get_select(dev->dev, "soc_pwm0"); + if (IS_ERR(pinctrl)) + DRM_ERROR("Failed to set pinmux to PWM\n"); + } + + if (want_panel_gpio) { + intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags); + if (IS_ERR(intel_dsi->gpio_panel)) { + DRM_ERROR("Failed to own gpio for panel control\n"); + intel_dsi->gpio_panel = NULL; + } + } + + if (want_backlight_gpio) { + intel_dsi->gpio_backlight = + gpiod_get(dev->dev, "backlight", flags); + if (IS_ERR(intel_dsi->gpio_backlight)) { + DRM_ERROR("Failed to own gpio for backlight control\n"); + intel_dsi->gpio_backlight = NULL; + } + } +} + +void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi) +{ + struct drm_device *dev = intel_dsi->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct mipi_config *mipi_config = dev_priv->vbt.dsi.config; + + if (intel_dsi->gpio_panel) { + gpiod_put(intel_dsi->gpio_panel); + intel_dsi->gpio_panel = NULL; + } + + if (intel_dsi->gpio_backlight) { + gpiod_put(intel_dsi->gpio_backlight); + intel_dsi->gpio_backlight = NULL; + } + + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + mipi_config->pwm_blc == PPS_BLC_PMIC) + gpiod_remove_lookup_table(&pmic_panel_gpio_table); + + if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { + pinctrl_unregister_mappings(soc_pwm_pinctrl_map); + gpiod_remove_lookup_table(&soc_panel_gpio_table); + } +} diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 22666d28f4aa..86a337c9d85d 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -34,7 +34,7 @@ #include "i915_drv.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dvo.h" #include "intel_dvo_dev.h" #include "intel_gmbus.h" @@ -125,7 +125,7 @@ static struct intel_dvo *enc_to_dvo(struct intel_encoder *encoder) return container_of(encoder, struct intel_dvo, base); } -static struct intel_dvo *intel_attached_dvo(struct drm_connector *connector) +static struct intel_dvo *intel_attached_dvo(struct intel_connector *connector) { return enc_to_dvo(intel_attached_encoder(connector)); } @@ -134,7 +134,7 @@ static bool intel_dvo_connector_get_hw_state(struct intel_connector *connector) { struct drm_device *dev = connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(&connector->base); + struct intel_dvo *intel_dvo = intel_attached_dvo(connector); u32 tmp; tmp = I915_READ(intel_dvo->dev.dvo_reg); @@ -178,9 +178,9 @@ static void intel_dvo_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_NVSYNC; - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock; } static void intel_disable_dvo(struct intel_encoder *encoder, @@ -207,8 +207,8 @@ static void intel_enable_dvo(struct intel_encoder *encoder, u32 temp = I915_READ(dvo_reg); intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev, - &pipe_config->base.mode, - &pipe_config->base.adjusted_mode); + &pipe_config->hw.mode, + &pipe_config->hw.adjusted_mode); I915_WRITE(dvo_reg, temp | DVO_ENABLE); I915_READ(dvo_reg); @@ -220,7 +220,7 @@ static enum drm_mode_status intel_dvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); + struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); const struct drm_display_mode *fixed_mode = to_intel_connector(connector)->panel.fixed_mode; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -253,7 +253,7 @@ static int intel_dvo_compute_config(struct intel_encoder *encoder, struct intel_dvo *intel_dvo = enc_to_dvo(encoder); const struct drm_display_mode *fixed_mode = intel_dvo->attached_connector->panel.fixed_mode; - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; /* * If we have timings from the BIOS for the panel, put them in @@ -277,10 +277,10 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct intel_dvo *intel_dvo = enc_to_dvo(encoder); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 dvo_val; i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg; i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg; @@ -311,7 +311,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector, bool force) { - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); + struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev); @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->pipe_mask = ~0; switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index d36cada2cc7d..a1048ece541e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -41,7 +41,7 @@ #include <drm/drm_fourcc.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -50,11 +50,6 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv) return HAS_FBC(dev_priv); } -static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) -{ - return INTEL_GEN(dev_priv) <= 3; -} - /* * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's @@ -73,7 +68,7 @@ static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value * we wrote to PIPESRC. */ -static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache, +static void intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache *cache, int *width, int *height) { if (width) @@ -83,7 +78,7 @@ static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache, } static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, - struct intel_fbc_state_cache *cache) + const struct intel_fbc_state_cache *cache) { int lines; @@ -110,9 +105,8 @@ static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) I915_WRITE(FBC_CONTROL, fbc_ctl); /* Wait for compressing bit to clear */ - if (intel_wait_for_register(&dev_priv->uncore, - FBC_STATUS, FBC_STAT_COMPRESSING, 0, - 10)) { + if (intel_de_wait_for_clear(dev_priv, FBC_STATUS, + FBC_STAT_COMPRESSING, 10)) { DRM_DEBUG_KMS("FBC idle timed out\n"); return; } @@ -144,8 +138,10 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) u32 fbc_ctl2; /* Set it up... */ - fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; + fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM; fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.i9xx_plane); + if (params->fence_id >= 0) + fbc_ctl2 |= FBC_CTL_CPU_FENCE; I915_WRITE(FBC_CONTROL2, fbc_ctl2); I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset); } @@ -157,7 +153,8 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; - fbc_ctl |= params->vma->fence->id; + if (params->fence_id >= 0) + fbc_ctl |= params->fence_id; I915_WRITE(FBC_CONTROL, fbc_ctl); } @@ -177,8 +174,8 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - if (params->flags & PLANE_HAS_FENCE) { - dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id; + if (params->fence_id >= 0) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id; I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); } else { I915_WRITE(DPFC_FENCE_YOFF, 0); @@ -235,14 +232,14 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->flags & PLANE_HAS_FENCE) { + if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN; if (IS_GEN(dev_priv, 5)) - dpfc_ctl |= params->vma->fence->id; + dpfc_ctl |= params->fence_id; if (IS_GEN(dev_priv, 6)) { I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | - params->vma->fence->id); + params->fence_id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } @@ -254,8 +251,6 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) } I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); - I915_WRITE(ILK_FBC_RT_BASE, - i915_ggtt_offset(params->vma) | ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -286,13 +281,12 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ - if (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { u32 val = I915_READ(CHICKEN_MISC_4); val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK); - if (i915_gem_object_get_tiling(params->vma->obj) != - I915_TILING_X) + if (params->gen9_wa_cfb_stride) val |= FBC_STRIDE_OVERRIDE | params->gen9_wa_cfb_stride; I915_WRITE(CHICKEN_MISC_4, val); @@ -318,11 +312,11 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->flags & PLANE_HAS_FENCE) { + if (params->fence_id >= 0) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | - params->vma->fence->id); + params->fence_id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } else { I915_WRITE(SNB_DPFC_CTL_SA,0); @@ -344,8 +338,8 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) HSW_FBCQ_DIS); } - if (IS_GEN(dev_priv, 11)) - /* Wa_1409120013:icl,ehl */ + if (INTEL_GEN(dev_priv) >= 11) + /* Wa_1409120013:icl,ehl,tgl */ I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -368,6 +362,7 @@ static void intel_fbc_hw_activate(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; fbc->active = true; + fbc->activated = true; if (INTEL_GEN(dev_priv) >= 7) gen7_fbc_activate(dev_priv); @@ -420,29 +415,10 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, fbc->no_fbc_reason = reason; } -static bool multiple_pipes_ok(struct intel_crtc *crtc, - struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &dev_priv->fbc; - enum pipe pipe = crtc->pipe; - - /* Don't even bother tracking anything we don't need. */ - if (!no_fbc_on_multiple_pipes(dev_priv)) - return true; - - if (plane_state->base.visible) - fbc->visible_pipes_mask |= (1 << pipe); - else - fbc->visible_pipes_mask &= ~(1 << pipe); - - return (fbc->visible_pipes_mask & ~(1 << pipe)) != 0; -} - static int find_compression_threshold(struct drm_i915_private *dev_priv, struct drm_mm_node *node, - int size, - int fb_cpp) + unsigned int size, + unsigned int fb_cpp) { int compression_threshold = 1; int ret; @@ -488,18 +464,15 @@ again: } } -static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) +static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, + unsigned int size, unsigned int fb_cpp) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; struct drm_mm_node *uninitialized_var(compressed_llb); - int size, fb_cpp, ret; + int ret; WARN_ON(drm_mm_node_allocated(&fbc->compressed_fb)); - size = intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache); - fb_cpp = fbc->state_cache.fb.format->cpp[0]; - ret = find_compression_threshold(dev_priv, &fbc->compressed_fb, size, fb_cpp); if (!ret) @@ -657,46 +630,55 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) } static void intel_fbc_update_state_cache(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *plane_state) + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; - struct drm_framebuffer *fb = plane_state->base.fb; + struct drm_framebuffer *fb = plane_state->hw.fb; - cache->vma = NULL; - cache->flags = 0; + cache->plane.visible = plane_state->uapi.visible; + if (!cache->plane.visible) + return; - cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; + cache->crtc.mode_flags = crtc_state->hw.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; - cache->plane.rotation = plane_state->base.rotation; + cache->plane.rotation = plane_state->hw.rotation; /* * Src coordinates are already rotated by 270 degrees for * the 90/270 degree plane rotation cases (to match the * GTT mapping), hence no need to account for rotation here. */ - cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; - cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; - cache->plane.visible = plane_state->base.visible; + cache->plane.src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16; cache->plane.adjusted_x = plane_state->color_plane[0].x; cache->plane.adjusted_y = plane_state->color_plane[0].y; - cache->plane.y = plane_state->base.src.y1 >> 16; + cache->plane.y = plane_state->uapi.src.y1 >> 16; - cache->plane.pixel_blend_mode = plane_state->base.pixel_blend_mode; - - if (!cache->plane.visible) - return; + cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode; cache->fb.format = fb->format; cache->fb.stride = fb->pitches[0]; - cache->vma = plane_state->vma; - cache->flags = plane_state->flags; - if (WARN_ON(cache->flags & PLANE_HAS_FENCE && !cache->vma->fence)) - cache->flags &= ~PLANE_HAS_FENCE; + WARN_ON(plane_state->flags & PLANE_HAS_FENCE && + !plane_state->vma->fence); + + if (plane_state->flags & PLANE_HAS_FENCE && + plane_state->vma->fence) + cache->fence_id = plane_state->vma->fence->id; + else + cache->fence_id = -1; +} + +static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + + return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) > + fbc->compressed_fb.size * fbc->threshold; } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -705,6 +687,11 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; + if (!cache->plane.visible) { + fbc->no_fbc_reason = "primary plane not visible"; + return false; + } + /* We don't need to use a state cache here since this information is * global for all CRTC. */ @@ -713,11 +700,6 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if (!cache->vma) { - fbc->no_fbc_reason = "primary plane not visible"; - return false; - } - if (cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) { fbc->no_fbc_reason = "incompatible mode"; return false; @@ -741,7 +723,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * For now this will effecively disable FBC with 90/270 degree * rotation. */ - if (!(cache->flags & PLANE_HAS_FENCE)) { + if (cache->fence_id < 0) { fbc->no_fbc_reason = "framebuffer not tiled or fenced"; return false; } @@ -784,8 +766,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * we didn't get any invalidate/deactivate calls, but this would require * a lot of tracking just for a specific case. If we conclude it's an * important case, we can implement it later. */ - if (intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) > - fbc->compressed_fb.size * fbc->threshold) { + if (intel_fbc_cfb_size_changed(dev_priv)) { fbc->no_fbc_reason = "CFB requirements changed"; return false; } @@ -795,7 +776,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (IS_GEN_RANGE(dev_priv, 9, 10) && + if (INTEL_GEN(dev_priv) >= 9 && (fbc->state_cache.plane.adjusted_y & 3)) { fbc->no_fbc_reason = "plane Y offset is misaligned"; return false; @@ -838,8 +819,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, * zero. */ memset(params, 0, sizeof(*params)); - params->vma = cache->vma; - params->flags = cache->flags; + params->fence_id = cache->fence_id; params->crtc.pipe = crtc->pipe; params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; @@ -850,39 +830,88 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache); - if (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv)) - params->gen9_wa_cfb_stride = DIV_ROUND_UP(cache->plane.src_w, - 32 * fbc->threshold) * 8; + params->gen9_wa_cfb_stride = cache->gen9_wa_cfb_stride; + + params->plane_visible = cache->plane.visible; +} + +static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_fbc *fbc = &dev_priv->fbc; + const struct intel_fbc_state_cache *cache = &fbc->state_cache; + const struct intel_fbc_reg_params *params = &fbc->params; + + if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi)) + return false; + + if (!params->plane_visible) + return false; + + if (!intel_fbc_can_activate(crtc)) + return false; + + if (params->fb.format != cache->fb.format) + return false; + + if (params->fb.stride != cache->fb.stride) + return false; + + if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache)) + return false; + + if (params->gen9_wa_cfb_stride != cache->gen9_wa_cfb_stride) + return false; + + return true; } -void intel_fbc_pre_update(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *plane_state) +bool intel_fbc_pre_update(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; const char *reason = "update pending"; + bool need_vblank_wait = false; if (!fbc_supported(dev_priv)) - return; + return need_vblank_wait; mutex_lock(&fbc->lock); - if (!multiple_pipes_ok(crtc, plane_state)) { - reason = "more than one pipe active"; - goto deactivate; - } - - if (!fbc->enabled || fbc->crtc != crtc) + if (fbc->crtc != crtc) goto unlock; intel_fbc_update_state_cache(crtc, crtc_state, plane_state); fbc->flip_pending = true; -deactivate: - intel_fbc_deactivate(dev_priv, reason); + if (!intel_fbc_can_flip_nuke(crtc_state)) { + intel_fbc_deactivate(dev_priv, reason); + + /* + * Display WA #1198: glk+ + * Need an extra vblank wait between FBC disable and most plane + * updates. Bspec says this is only needed for plane disable, but + * that is not true. Touching most plane registers will cause the + * corruption to appear. Also SKL/derivatives do not seem to be + * affected. + * + * TODO: could optimize this a bit by sampling the frame + * counter when we disable FBC (if it was already done earlier) + * and skipping the extra vblank wait before the plane update + * if at least one frame has already passed. + */ + if (fbc->activated && + (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))) + need_vblank_wait = true; + fbc->activated = false; + } unlock: mutex_unlock(&fbc->lock); + + return need_vblank_wait; } /** @@ -898,14 +927,13 @@ static void __intel_fbc_disable(struct drm_i915_private *dev_priv) struct intel_crtc *crtc = fbc->crtc; WARN_ON(!mutex_is_locked(&fbc->lock)); - WARN_ON(!fbc->enabled); + WARN_ON(!fbc->crtc); WARN_ON(fbc->active); DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); __intel_fbc_cleanup_cfb(dev_priv); - fbc->enabled = false; fbc->crtc = NULL; } @@ -916,11 +944,10 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) WARN_ON(!mutex_is_locked(&fbc->lock)); - if (!fbc->enabled || fbc->crtc != crtc) + if (fbc->crtc != crtc) return; fbc->flip_pending = false; - WARN_ON(fbc->active); if (!i915_modparams.enable_fbc) { intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); @@ -934,10 +961,9 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) if (!intel_fbc_can_activate(crtc)) return; - if (!fbc->busy_bits) { - intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)"); + if (!fbc->busy_bits) intel_fbc_hw_activate(dev_priv); - } else + else intel_fbc_deactivate(dev_priv, "frontbuffer write"); } @@ -956,7 +982,7 @@ void intel_fbc_post_update(struct intel_crtc *crtc) static unsigned int intel_fbc_get_frontbuffer_bit(struct intel_fbc *fbc) { - if (fbc->enabled) + if (fbc->crtc) return to_intel_plane(fbc->crtc->base.primary)->frontbuffer_bit; else return fbc->possible_framebuffer_bits; @@ -978,7 +1004,7 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv, fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits; - if (fbc->enabled && fbc->busy_bits) + if (fbc->crtc && fbc->busy_bits) intel_fbc_deactivate(dev_priv, "frontbuffer write"); mutex_unlock(&fbc->lock); @@ -999,7 +1025,7 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv, if (origin == ORIGIN_GTT || origin == ORIGIN_FLIP) goto out; - if (!fbc->busy_bits && fbc->enabled && + if (!fbc->busy_bits && fbc->crtc && (frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) { if (fbc->active) intel_fbc_recompress(dev_priv); @@ -1048,12 +1074,12 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, * to pipe or plane A. */ for_each_new_intel_plane_in_state(state, plane, plane_state, i) { struct intel_crtc_state *crtc_state; - struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); if (!plane->has_fbc) continue; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) continue; crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -1082,42 +1108,53 @@ out: * intel_fbc_disable in the middle, as long as it is deactivated. */ void intel_fbc_enable(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *plane_state) + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; + struct intel_fbc_state_cache *cache = &fbc->state_cache; + const struct drm_framebuffer *fb = plane_state->hw.fb; if (!fbc_supported(dev_priv)) return; mutex_lock(&fbc->lock); - if (fbc->enabled) { - WARN_ON(fbc->crtc == NULL); - if (fbc->crtc == crtc) { - WARN_ON(!crtc_state->enable_fbc); - WARN_ON(fbc->active); - } - goto out; - } + if (fbc->crtc) { + if (fbc->crtc != crtc || + !intel_fbc_cfb_size_changed(dev_priv)) + goto out; - if (!crtc_state->enable_fbc) - goto out; + __intel_fbc_disable(dev_priv); + } WARN_ON(fbc->active); - WARN_ON(fbc->crtc != NULL); intel_fbc_update_state_cache(crtc, crtc_state, plane_state); - if (intel_fbc_alloc_cfb(crtc)) { + + /* FIXME crtc_state->enable_fbc lies :( */ + if (!cache->plane.visible) + goto out; + + if (intel_fbc_alloc_cfb(dev_priv, + intel_fbc_calculate_cfb_size(dev_priv, cache), + fb->format->cpp[0])) { + cache->plane.visible = false; fbc->no_fbc_reason = "not enough stolen memory"; goto out; } + if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) && + fb->modifier != I915_FORMAT_MOD_X_TILED) + cache->gen9_wa_cfb_stride = + DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; + else + cache->gen9_wa_cfb_stride = 0; + DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe)); fbc->no_fbc_reason = "FBC enabled but not active yet\n"; - fbc->enabled = true; fbc->crtc = crtc; out: mutex_unlock(&fbc->lock); @@ -1157,7 +1194,7 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv) return; mutex_lock(&fbc->lock); - if (fbc->enabled) { + if (fbc->crtc) { WARN_ON(fbc->crtc->active); __intel_fbc_disable(dev_priv); } @@ -1173,7 +1210,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work) mutex_lock(&fbc->lock); /* Maybe we were scheduled twice. */ - if (fbc->underrun_detected || !fbc->enabled) + if (fbc->underrun_detected || !fbc->crtc) goto out; DRM_DEBUG_KMS("Disabling FBC due to FIFO underrun.\n"); @@ -1245,28 +1282,6 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv) schedule_work(&fbc->underrun_work); } -/** - * intel_fbc_init_pipe_state - initialize FBC's CRTC visibility tracking - * @dev_priv: i915 device instance - * - * The FBC code needs to track CRTC visibility since the older platforms can't - * have FBC enabled while multiple pipes are used. This function does the - * initial setup at driver load to make sure FBC is matching the real hardware. - */ -void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) -{ - struct intel_crtc *crtc; - - /* Don't even bother tracking anything if we don't need. */ - if (!no_fbc_on_multiple_pipes(dev_priv)) - return; - - for_each_intel_crtc(&dev_priv->drm, crtc) - if (intel_crtc_active(crtc) && - crtc->base.primary->state->visible) - dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); -} - /* * The DDX driver changes its behavior depending on the value it reads from * i915.enable_fbc, so sanitize it by translating the default value into either @@ -1284,10 +1299,6 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (!HAS_FBC(dev_priv)) return 0; - /* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */ - if (IS_GEMINILAKE(dev_priv)) - return 0; - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) return 1; @@ -1318,9 +1329,11 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); mutex_init(&fbc->lock); - fbc->enabled = false; fbc->active = false; + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + mkwrite_device_info(dev_priv)->display.has_fbc = false; + if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->display.has_fbc = false; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h index 50272eda8d43..c8a5e5098687 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.h +++ b/drivers/gpu/drm/i915/display/intel_fbc.h @@ -19,15 +19,14 @@ struct intel_plane_state; void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct intel_atomic_state *state); bool intel_fbc_is_active(struct drm_i915_private *dev_priv); -void intel_fbc_pre_update(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *plane_state); +bool intel_fbc_pre_update(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); void intel_fbc_post_update(struct intel_crtc *crtc); void intel_fbc_init(struct drm_i915_private *dev_priv); -void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv); void intel_fbc_enable(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *plane_state); + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); void intel_fbc_disable(struct intel_crtc *crtc); void intel_fbc_global_disable(struct drm_i915_private *dev_priv); void intel_fbc_invalidate(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 1edd44ee32b2..1e98e432c9fa 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -43,17 +43,18 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_fbdev.h" #include "intel_frontbuffer.h" -static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev) { - struct drm_i915_gem_object *obj = intel_fb_obj(&ifbdev->fb->base); - unsigned int origin = - ifbdev->vma_flags & PLANE_HAS_FENCE ? ORIGIN_GTT : ORIGIN_CPU; + return ifbdev->fb->frontbuffer; +} - intel_fb_obj_invalidate(obj, origin); +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + intel_frontbuffer_invalidate(to_frontbuffer(ifbdev), ORIGIN_CPU); } static int intel_fbdev_set_par(struct fb_info *info) @@ -99,7 +100,7 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, return ret; } -static struct fb_ops intelfb_ops = { +static const struct fb_ops intelfb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, .fb_set_par = intel_fbdev_set_par, @@ -120,7 +121,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; - int size, ret; + int size; /* we don't do packed 24bpp */ if (sizes->surface_bpp == 24) @@ -140,31 +141,23 @@ static int intelfb_alloc(struct drm_fb_helper *helper, /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ - obj = NULL; + obj = ERR_PTR(-ENODEV); if (size * 2 < dev_priv->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); - ret = PTR_ERR(obj); - goto err; + return PTR_ERR(obj); } fb = intel_framebuffer_create(obj, &mode_cmd); - if (IS_ERR(fb)) { - ret = PTR_ERR(fb); - goto err_obj; - } + i915_gem_object_put(obj); + if (IS_ERR(fb)) + return PTR_ERR(fb); ifbdev->fb = to_intel_framebuffer(fb); - return 0; - -err_obj: - i915_gem_object_put(obj); -err: - return ret; } static int intelfb_create(struct drm_fb_helper *helper, @@ -180,7 +173,6 @@ static int intelfb_create(struct drm_fb_helper *helper, const struct i915_ggtt_view view = { .type = I915_GGTT_VIEW_NORMAL, }; - struct drm_framebuffer *fb; intel_wakeref_t wakeref; struct fb_info *info; struct i915_vma *vma; @@ -212,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - mutex_lock(&dev->struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); /* Pin the GGTT vma for our access via info->screen_base. @@ -226,8 +217,7 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unlock; } - fb = &ifbdev->fb->base; - intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB); + intel_frontbuffer_flush(to_frontbuffer(ifbdev), ORIGIN_DIRTYFB); info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { @@ -236,15 +226,17 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unpin; } - ifbdev->helper.fb = fb; + ifbdev->helper.fb = &ifbdev->fb->base; info->fbops = &intelfb_ops; /* setup aperture base/size for vesafb takeover */ - info->apertures->ranges[0].base = dev->mode_config.fb_base; + info->apertures->ranges[0].base = ggtt->gmadr.start; info->apertures->ranges[0].size = ggtt->mappable_end; - info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma); + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + vma->node.start); info->fix.smem_len = vma->node.size; vaddr = i915_vma_pin_iomap(vma); @@ -262,18 +254,18 @@ static int intelfb_create(struct drm_fb_helper *helper, * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (intel_fb_obj(fb)->stolen && !prealloc) + if (vma->obj->stolen && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", - fb->width, fb->height, i915_ggtt_offset(vma)); + ifbdev->fb->base.width, ifbdev->fb->base.height, + i915_ggtt_offset(vma)); ifbdev->vma = vma; ifbdev->vma_flags = flags; intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -281,7 +273,6 @@ out_unpin: intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -298,11 +289,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) drm_fb_helper_fini(&ifbdev->helper); - if (ifbdev->vma) { - mutex_lock(&ifbdev->helper.dev->struct_mutex); + if (ifbdev->vma) intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); - mutex_unlock(&ifbdev->helper.dev->struct_mutex); - } if (ifbdev->fb) drm_framebuffer_remove(&ifbdev->fb->base); @@ -451,7 +439,7 @@ int intel_fbdev_init(struct drm_device *dev) struct intel_fbdev *ifbdev; int ret; - if (WARN_ON(!HAS_DISPLAY(dev_priv))) + if (WARN_ON(!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 8545ad32bb50..6c83b350525d 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -26,7 +26,8 @@ */ #include "i915_drv.h" -#include "intel_drv.h" +#include "i915_trace.h" +#include "intel_display_types.h" #include "intel_fbc.h" #include "intel_fifo_underrun.h" @@ -125,8 +126,8 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, } } -static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, bool enable) +static void ilk_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); u32 bit = (pipe == PIPE_A) ? @@ -138,7 +139,7 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, ilk_disable_display_irq(dev_priv, bit); } -static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) +static void ivb_check_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -156,9 +157,9 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } -static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, - bool enable, bool old) +static void ivb_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable, + bool old) { struct drm_i915_private *dev_priv = to_i915(dev); if (enable) { @@ -179,8 +180,8 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev, } } -static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, bool enable) +static void bdw_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -263,11 +264,11 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, if (HAS_GMCH(dev_priv)) i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (IS_GEN_RANGE(dev_priv, 5, 6)) - ironlake_set_fifo_underrun_reporting(dev, pipe, enable); + ilk_set_fifo_underrun_reporting(dev, pipe, enable); else if (IS_GEN(dev_priv, 7)) - ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old); + ivb_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (INTEL_GEN(dev_priv) >= 8) - broadwell_set_fifo_underrun_reporting(dev, pipe, enable); + bdw_set_fifo_underrun_reporting(dev, pipe, enable); return old; } @@ -426,7 +427,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv) if (HAS_GMCH(dev_priv)) i9xx_check_fifo_underruns(crtc); else if (IS_GEN(dev_priv, 7)) - ivybridge_check_fifo_underruns(crtc); + ivb_check_fifo_underruns(crtc); } spin_unlock_irq(&dev_priv->irq_lock); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 44273c10cea5..6cb02c912acc 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -30,11 +30,11 @@ * Many features require us to track changes to the currently active * frontbuffer, especially rendering targeted at the frontbuffer. * - * To be able to do so GEM tracks frontbuffers using a bitmask for all possible - * frontbuffer slots through i915_gem_track_fb(). The function in this file are - * then called when the contents of the frontbuffer are invalidated, when - * frontbuffer rendering has stopped again to flush out all the changes and when - * the frontbuffer is exchanged with a flip. Subsystems interested in + * To be able to do so we track frontbuffers using a bitmask for all possible + * frontbuffer slots through intel_frontbuffer_track(). The functions in this + * file are then called when the contents of the frontbuffer are invalidated, + * when frontbuffer rendering has stopped again to flush out all the changes + * and when the frontbuffer is exchanged with a flip. Subsystems interested in * frontbuffer changes (e.g. PSR, FBC, DRRS) should directly put their callbacks * into the relevant places and filter for the frontbuffer slots that they are * interested int. @@ -58,33 +58,14 @@ #include "display/intel_dp.h" #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" #include "intel_psr.h" -void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin, - unsigned int frontbuffer_bits) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (origin == ORIGIN_CS) { - spin_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.busy_bits |= frontbuffer_bits; - dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; - spin_unlock(&dev_priv->fb_tracking.lock); - } - - might_sleep(); - intel_psr_invalidate(dev_priv, frontbuffer_bits, origin); - intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); - intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); -} - /** - * intel_frontbuffer_flush - flush frontbuffer - * @dev_priv: i915 device + * frontbuffer_flush - flush frontbuffer + * @i915: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -94,45 +75,27 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, - unsigned frontbuffer_bits, - enum fb_op_origin origin) +static void frontbuffer_flush(struct drm_i915_private *i915, + unsigned int frontbuffer_bits, + enum fb_op_origin origin) { /* Delay flushing when rings are still busy.*/ - spin_lock(&dev_priv->fb_tracking.lock); - frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; - spin_unlock(&dev_priv->fb_tracking.lock); + spin_lock(&i915->fb_tracking.lock); + frontbuffer_bits &= ~i915->fb_tracking.busy_bits; + spin_unlock(&i915->fb_tracking.lock); if (!frontbuffer_bits) return; might_sleep(); - intel_edp_drrs_flush(dev_priv, frontbuffer_bits); - intel_psr_flush(dev_priv, frontbuffer_bits, origin); - intel_fbc_flush(dev_priv, frontbuffer_bits, origin); -} - -void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - enum fb_op_origin origin, - unsigned int frontbuffer_bits) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (origin == ORIGIN_CS) { - spin_lock(&dev_priv->fb_tracking.lock); - /* Filter out new bits since rendering started. */ - frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&dev_priv->fb_tracking.lock); - } - - if (frontbuffer_bits) - intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin); + intel_edp_drrs_flush(i915, frontbuffer_bits); + intel_psr_flush(i915, frontbuffer_bits, origin); + intel_fbc_flush(i915, frontbuffer_bits, origin); } /** * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @dev_priv: i915 device + * @i915: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. The actual @@ -142,19 +105,19 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, +void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, unsigned frontbuffer_bits) { - spin_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; + spin_lock(&i915->fb_tracking.lock); + i915->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&dev_priv->fb_tracking.lock); + i915->fb_tracking.busy_bits &= ~frontbuffer_bits; + spin_unlock(&i915->fb_tracking.lock); } /** * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @dev_priv: i915 device + * @i915: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete @@ -162,23 +125,22 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, +void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, unsigned frontbuffer_bits) { - spin_lock(&dev_priv->fb_tracking.lock); + spin_lock(&i915->fb_tracking.lock); /* Mask any cancelled flips. */ - frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; - dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; - spin_unlock(&dev_priv->fb_tracking.lock); + frontbuffer_bits &= i915->fb_tracking.flip_bits; + i915->fb_tracking.flip_bits &= ~frontbuffer_bits; + spin_unlock(&i915->fb_tracking.lock); if (frontbuffer_bits) - intel_frontbuffer_flush(dev_priv, - frontbuffer_bits, ORIGIN_FLIP); + frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP); } /** * intel_frontbuffer_flip - synchronous frontbuffer flip - * @dev_priv: i915 device + * @i915: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. This is for @@ -187,13 +149,165 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, * * Can be called without any locks held. */ -void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, +void intel_frontbuffer_flip(struct drm_i915_private *i915, unsigned frontbuffer_bits) { - spin_lock(&dev_priv->fb_tracking.lock); + spin_lock(&i915->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&dev_priv->fb_tracking.lock); + i915->fb_tracking.busy_bits &= ~frontbuffer_bits; + spin_unlock(&i915->fb_tracking.lock); + + frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP); +} + +void __intel_fb_invalidate(struct intel_frontbuffer *front, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) +{ + struct drm_i915_private *i915 = to_i915(front->obj->base.dev); + + if (origin == ORIGIN_CS) { + spin_lock(&i915->fb_tracking.lock); + i915->fb_tracking.busy_bits |= frontbuffer_bits; + i915->fb_tracking.flip_bits &= ~frontbuffer_bits; + spin_unlock(&i915->fb_tracking.lock); + } + + might_sleep(); + intel_psr_invalidate(i915, frontbuffer_bits, origin); + intel_edp_drrs_invalidate(i915, frontbuffer_bits); + intel_fbc_invalidate(i915, frontbuffer_bits, origin); +} + +void __intel_fb_flush(struct intel_frontbuffer *front, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) +{ + struct drm_i915_private *i915 = to_i915(front->obj->base.dev); + + if (origin == ORIGIN_CS) { + spin_lock(&i915->fb_tracking.lock); + /* Filter out new bits since rendering started. */ + frontbuffer_bits &= i915->fb_tracking.busy_bits; + i915->fb_tracking.busy_bits &= ~frontbuffer_bits; + spin_unlock(&i915->fb_tracking.lock); + } + + if (frontbuffer_bits) + frontbuffer_flush(i915, frontbuffer_bits, origin); +} + +static int frontbuffer_active(struct i915_active *ref) +{ + struct intel_frontbuffer *front = + container_of(ref, typeof(*front), write); + + kref_get(&front->ref); + return 0; +} + +__i915_active_call +static void frontbuffer_retire(struct i915_active *ref) +{ + struct intel_frontbuffer *front = + container_of(ref, typeof(*front), write); + + intel_frontbuffer_flush(front, ORIGIN_CS); + intel_frontbuffer_put(front); +} + +static void frontbuffer_release(struct kref *ref) + __releases(&to_i915(front->obj->base.dev)->fb_tracking.lock) +{ + struct intel_frontbuffer *front = + container_of(ref, typeof(*front), ref); + struct drm_i915_gem_object *obj = front->obj; + struct i915_vma *vma; + + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + spin_unlock(&obj->vma.lock); + + RCU_INIT_POINTER(obj->frontbuffer, NULL); + spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); + + i915_gem_object_put(obj); + kfree_rcu(front, rcu); +} + +struct intel_frontbuffer * +intel_frontbuffer_get(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) + return front; - intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP); + front = kmalloc(sizeof(*front), GFP_KERNEL); + if (!front) + return NULL; + + front->obj = obj; + kref_init(&front->ref); + atomic_set(&front->bits, 0); + i915_active_init(&front->write, + frontbuffer_active, + i915_active_may_sleep(frontbuffer_retire)); + + spin_lock(&i915->fb_tracking.lock); + if (rcu_access_pointer(obj->frontbuffer)) { + kfree(front); + front = rcu_dereference_protected(obj->frontbuffer, true); + kref_get(&front->ref); + } else { + i915_gem_object_get(obj); + rcu_assign_pointer(obj->frontbuffer, front); + } + spin_unlock(&i915->fb_tracking.lock); + + return front; +} + +void intel_frontbuffer_put(struct intel_frontbuffer *front) +{ + kref_put_lock(&front->ref, + frontbuffer_release, + &to_i915(front->obj->base.dev)->fb_tracking.lock); +} + +/** + * intel_frontbuffer_track - update frontbuffer tracking + * @old: current buffer for the frontbuffer slots + * @new: new buffer for the frontbuffer slots + * @frontbuffer_bits: bitmask of frontbuffer slots + * + * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them + * from @old and setting them in @new. Both @old and @new can be NULL. + */ +void intel_frontbuffer_track(struct intel_frontbuffer *old, + struct intel_frontbuffer *new, + unsigned int frontbuffer_bits) +{ + /* + * Control of individual bits within the mask are guarded by + * the owning plane->mutex, i.e. we can never see concurrent + * manipulation of individual bits. But since the bitfield as a whole + * is updated using RMW, we need to use atomics in order to update + * the bits. + */ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > + BITS_PER_TYPE(atomic_t)); + + if (old) { + WARN_ON(!(atomic_read(&old->bits) & frontbuffer_bits)); + atomic_andnot(frontbuffer_bits, &old->bits); + } + + if (new) { + WARN_ON(atomic_read(&new->bits) & frontbuffer_bits); + atomic_or(frontbuffer_bits, &new->bits); + } } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h index 5727320c8084..6d41f5394425 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h @@ -24,10 +24,13 @@ #ifndef __INTEL_FRONTBUFFER_H__ #define __INTEL_FRONTBUFFER_H__ -#include "gem/i915_gem_object.h" +#include <linux/atomic.h> +#include <linux/kref.h> + +#include "gem/i915_gem_object_types.h" +#include "i915_active.h" struct drm_i915_private; -struct drm_i915_gem_object; enum fb_op_origin { ORIGIN_GTT, @@ -37,23 +40,60 @@ enum fb_op_origin { ORIGIN_DIRTYFB, }; -void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, +struct intel_frontbuffer { + struct kref ref; + atomic_t bits; + struct i915_active write; + struct drm_i915_gem_object *obj; + struct rcu_head rcu; +}; + +void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, +void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, +void intel_frontbuffer_flip(struct drm_i915_private *i915, unsigned frontbuffer_bits); -void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin, - unsigned int frontbuffer_bits); -void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - enum fb_op_origin origin, - unsigned int frontbuffer_bits); +void intel_frontbuffer_put(struct intel_frontbuffer *front); + +static inline struct intel_frontbuffer * +__intel_frontbuffer_get(const struct drm_i915_gem_object *obj) +{ + struct intel_frontbuffer *front; + + if (likely(!rcu_access_pointer(obj->frontbuffer))) + return NULL; + + rcu_read_lock(); + do { + front = rcu_dereference(obj->frontbuffer); + if (!front) + break; + + if (unlikely(!kref_get_unless_zero(&front->ref))) + continue; + + if (likely(front == rcu_access_pointer(obj->frontbuffer))) + break; + + intel_frontbuffer_put(front); + } while (1); + rcu_read_unlock(); + + return front; +} + +struct intel_frontbuffer * +intel_frontbuffer_get(struct drm_i915_gem_object *obj); + +void __intel_fb_invalidate(struct intel_frontbuffer *front, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); /** - * intel_fb_obj_invalidate - invalidate frontbuffer object - * @obj: GEM object to invalidate + * intel_frontbuffer_invalidate - invalidate frontbuffer object + * @front: GEM object to invalidate * @origin: which operation caused the invalidation * * This function gets called every time rendering on the given object starts and @@ -62,37 +102,51 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, * until the rendering completes or a flip on this frontbuffer plane is * scheduled. */ -static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) +static inline bool intel_frontbuffer_invalidate(struct intel_frontbuffer *front, + enum fb_op_origin origin) { unsigned int frontbuffer_bits; - frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!front) + return false; + + frontbuffer_bits = atomic_read(&front->bits); if (!frontbuffer_bits) return false; - __intel_fb_obj_invalidate(obj, origin, frontbuffer_bits); + __intel_fb_invalidate(front, origin, frontbuffer_bits); return true; } +void __intel_fb_flush(struct intel_frontbuffer *front, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); + /** - * intel_fb_obj_flush - flush frontbuffer object - * @obj: GEM object to flush + * intel_frontbuffer_flush - flush frontbuffer object + * @front: GEM object to flush * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has * completed and frontbuffer caching can be started again. */ -static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) +static inline void intel_frontbuffer_flush(struct intel_frontbuffer *front, + enum fb_op_origin origin) { unsigned int frontbuffer_bits; - frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!front) + return; + + frontbuffer_bits = atomic_read(&front->bits); if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, origin, frontbuffer_bits); + __intel_fb_flush(front, origin, frontbuffer_bits); } +void intel_frontbuffer_track(struct intel_frontbuffer *old, + struct intel_frontbuffer *new, + unsigned int frontbuffer_bits); + #endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 4f6a9bd5af47..3d4d19ac1d14 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -35,7 +35,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_gmbus.h" struct gmbus_pin { @@ -82,25 +82,20 @@ static const struct gmbus_pin gmbus_pins_cnp[] = { static const struct gmbus_pin gmbus_pins_icp[] = { [GMBUS_PIN_1_BXT] = { "dpa", GPIOB }, [GMBUS_PIN_2_BXT] = { "dpb", GPIOC }, + [GMBUS_PIN_3_BXT] = { "dpc", GPIOD }, [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ }, [GMBUS_PIN_10_TC2_ICP] = { "tc2", GPIOK }, [GMBUS_PIN_11_TC3_ICP] = { "tc3", GPIOL }, [GMBUS_PIN_12_TC4_ICP] = { "tc4", GPIOM }, -}; - -static const struct gmbus_pin gmbus_pins_mcc[] = { - [GMBUS_PIN_1_BXT] = { "dpa", GPIOB }, - [GMBUS_PIN_2_BXT] = { "dpb", GPIOC }, - [GMBUS_PIN_9_TC1_ICP] = { "dpc", GPIOJ }, + [GMBUS_PIN_13_TC5_TGP] = { "tc5", GPION }, + [GMBUS_PIN_14_TC6_TGP] = { "tc6", GPIOO }, }; /* pin is expected to be valid */ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, unsigned int pin) { - if (HAS_PCH_MCC(dev_priv)) - return &gmbus_pins_mcc[pin]; - else if (HAS_PCH_ICP(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) return &gmbus_pins_icp[pin]; else if (HAS_PCH_CNP(dev_priv)) return &gmbus_pins_cnp[pin]; @@ -119,9 +114,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, { unsigned int size; - if (HAS_PCH_MCC(dev_priv)) - size = ARRAY_SIZE(gmbus_pins_mcc); - else if (HAS_PCH_ICP(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) size = ARRAY_SIZE(gmbus_pins_icp); else if (HAS_PCH_CNP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_cnp); @@ -843,7 +836,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return 0; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.h b/drivers/gpu/drm/i915/display/intel_gmbus.h index d989085b8d22..b96212b85425 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.h +++ b/drivers/gpu/drm/i915/display/intel_gmbus.h @@ -11,6 +11,28 @@ struct drm_i915_private; struct i2c_adapter; +#define GMBUS_PIN_DISABLED 0 +#define GMBUS_PIN_SSC 1 +#define GMBUS_PIN_VGADDC 2 +#define GMBUS_PIN_PANEL 3 +#define GMBUS_PIN_DPD_CHV 3 /* HDMID_CHV */ +#define GMBUS_PIN_DPC 4 /* HDMIC */ +#define GMBUS_PIN_DPB 5 /* SDVO, HDMIB */ +#define GMBUS_PIN_DPD 6 /* HDMID */ +#define GMBUS_PIN_RESERVED 7 /* 7 reserved */ +#define GMBUS_PIN_1_BXT 1 /* BXT+ (atom) and CNP+ (big core) */ +#define GMBUS_PIN_2_BXT 2 +#define GMBUS_PIN_3_BXT 3 +#define GMBUS_PIN_4_CNP 4 +#define GMBUS_PIN_9_TC1_ICP 9 +#define GMBUS_PIN_10_TC2_ICP 10 +#define GMBUS_PIN_11_TC3_ICP 11 +#define GMBUS_PIN_12_TC4_ICP 12 +#define GMBUS_PIN_13_TC5_TGP 13 +#define GMBUS_PIN_14_TC6_TGP 14 + +#define GMBUS_NUM_PINS 15 /* including 0 */ + int intel_gmbus_setup(struct drm_i915_private *dev_priv); void intel_gmbus_teardown(struct drm_i915_private *dev_priv); bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 27bd7276a82d..0fdbd39f6641 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: MIT */ /* * Copyright (C) 2017 Google, Inc. + * Copyright _ 2017-2019, Intel Corporation. * * Authors: * Sean Paul <seanpaul@chromium.org> + * Ramalingam C <ramalingam.c@intel.com> */ #include <linux/component.h> @@ -14,9 +16,11 @@ #include <drm/i915_component.h> #include "i915_reg.h" -#include "intel_drv.h" +#include "intel_display_power.h" +#include "intel_display_types.h" #include "intel_hdcp.h" #include "intel_sideband.h" +#include "intel_connector.h" #define KEY_LOAD_TRIES 5 #define ENCRYPT_STATUS_CHANGE_TIMEOUT_MS 50 @@ -104,24 +108,20 @@ bool intel_hdcp2_capable(struct intel_connector *connector) return capable; } -static inline bool intel_hdcp_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(PORT_HDCP_STATUS(port)); - return reg & HDCP_STATUS_ENC; + return I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & + HDCP_STATUS_ENC; } -static inline bool intel_hdcp2_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(HDCP2_STATUS_DDI(port)); - return reg & LINK_ENCRYPTION_STATUS; + return I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS; } static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, @@ -244,8 +244,7 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) { I915_WRITE(HDCP_SHA_TEXT, sha_text); - if (intel_wait_for_register(&dev_priv->uncore, HDCP_REP_CTL, - HDCP_SHA1_READY, HDCP_SHA1_READY, 1)) { + if (intel_de_wait_for_set(dev_priv, HDCP_REP_CTL, HDCP_SHA1_READY, 1)) { DRM_ERROR("Timed out waiting for SHA1 ready\n"); return -ETIMEDOUT; } @@ -253,9 +252,29 @@ static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) } static -u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) +u32 intel_hdcp_get_repeater_ctl(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - enum port port = intel_dig_port->base.port; + if (INTEL_GEN(dev_priv) >= 12) { + switch (cpu_transcoder) { + case TRANSCODER_A: + return HDCP_TRANSA_REP_PRESENT | + HDCP_TRANSA_SHA1_M0; + case TRANSCODER_B: + return HDCP_TRANSB_REP_PRESENT | + HDCP_TRANSB_SHA1_M0; + case TRANSCODER_C: + return HDCP_TRANSC_REP_PRESENT | + HDCP_TRANSC_SHA1_M0; + case TRANSCODER_D: + return HDCP_TRANSD_REP_PRESENT | + HDCP_TRANSD_SHA1_M0; + default: + DRM_ERROR("Unknown transcoder %d\n", cpu_transcoder); + return -EINVAL; + } + } + switch (port) { case PORT_A: return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0; @@ -268,18 +287,20 @@ u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) case PORT_E: return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0; default: - break; + DRM_ERROR("Unknown port %d\n", port); + return -EINVAL; } - DRM_ERROR("Unknown port %d\n", port); - return -EINVAL; } static -int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, +int intel_hdcp_validate_v_prime(struct intel_connector *connector, const struct intel_hdcp_shim *shim, u8 *ksv_fifo, u8 num_downstream, u8 *bstatus) { + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; + enum port port = intel_dig_port->base.port; u32 vprime, sha_text, sha_leftovers, rep_ctl; int ret, i, j, sha_idx; @@ -306,7 +327,7 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, sha_idx = 0; sha_text = 0; sha_leftovers = 0; - rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port); + rep_ctl = intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port); I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); for (i = 0; i < num_downstream; i++) { unsigned int sha_empty; @@ -475,9 +496,8 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, /* Tell the HW we're done with the hash and wait for it to ACK */ I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_COMPLETE_HASH); - if (intel_wait_for_register(&dev_priv->uncore, HDCP_REP_CTL, - HDCP_SHA1_COMPLETE, - HDCP_SHA1_COMPLETE, 1)) { + if (intel_de_wait_for_set(dev_priv, HDCP_REP_CTL, + HDCP_SHA1_COMPLETE, 1)) { DRM_ERROR("Timed out waiting for SHA1 complete\n"); return -ETIMEDOUT; } @@ -523,12 +543,16 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector) * authentication. */ num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); - if (num_downstream == 0) + if (num_downstream == 0) { + DRM_DEBUG_KMS("Repeater with zero downstream devices\n"); return -EINVAL; + } ksv_fifo = kcalloc(DRM_HDCP_KSV_LEN, num_downstream, GFP_KERNEL); - if (!ksv_fifo) + if (!ksv_fifo) { + DRM_DEBUG_KMS("Out of mem: ksv_fifo\n"); return -ENOMEM; + } ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); if (ret) @@ -545,7 +569,7 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector) * V prime atleast twice. */ for (i = 0; i < tries; i++) { - ret = intel_hdcp_validate_v_prime(intel_dig_port, shim, + ret = intel_hdcp_validate_v_prime(connector, shim, ksv_fifo, num_downstream, bstatus); if (!ret) @@ -573,6 +597,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) struct drm_device *dev = connector->base.dev; const struct intel_hdcp_shim *shim = hdcp->shim; struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; enum port port; unsigned long r0_prime_gen_start; int ret, i, tries = 2; @@ -612,19 +637,21 @@ static int intel_hdcp_auth(struct intel_connector *connector) /* Initialize An with 2 random values and acquire it */ for (i = 0; i < 2; i++) - I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32()); - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN); + I915_WRITE(HDCP_ANINIT(dev_priv, cpu_transcoder, port), + get_random_u32()); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_CAPTURE_AN); /* Wait for An to be acquired */ - if (intel_wait_for_register(&dev_priv->uncore, PORT_HDCP_STATUS(port), - HDCP_STATUS_AN_READY, - HDCP_STATUS_AN_READY, 1)) { + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), + HDCP_STATUS_AN_READY, 1)) { DRM_ERROR("Timed out waiting for An\n"); return -ETIMEDOUT; } - an.reg[0] = I915_READ(PORT_HDCP_ANLO(port)); - an.reg[1] = I915_READ(PORT_HDCP_ANHI(port)); + an.reg[0] = I915_READ(HDCP_ANLO(dev_priv, cpu_transcoder, port)); + an.reg[1] = I915_READ(HDCP_ANHI(dev_priv, cpu_transcoder, port)); ret = shim->write_an_aksv(intel_dig_port, an.shim); if (ret) return ret; @@ -642,24 +669,26 @@ static int intel_hdcp_auth(struct intel_connector *connector) return -EPERM; } - I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]); - I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]); + I915_WRITE(HDCP_BKSVLO(dev_priv, cpu_transcoder, port), bksv.reg[0]); + I915_WRITE(HDCP_BKSVHI(dev_priv, cpu_transcoder, port), bksv.reg[1]); ret = shim->repeater_present(intel_dig_port, &repeater_present); if (ret) return ret; if (repeater_present) I915_WRITE(HDCP_REP_CTL, - intel_hdcp_get_repeater_ctl(intel_dig_port)); + intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, + port)); ret = shim->toggle_signalling(intel_dig_port, true); if (ret) return ret; - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_AUTH_AND_ENC); /* Wait for R0 ready */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Timed out waiting for R0 ready\n"); return -ETIMEDOUT; @@ -687,24 +716,27 @@ static int intel_hdcp_auth(struct intel_connector *connector) ret = shim->read_ri_prime(intel_dig_port, ri.shim); if (ret) return ret; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (!wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) break; } if (i == tries) { DRM_DEBUG_KMS("Timed out waiting for Ri prime match (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return -ETIMEDOUT; } /* Wait for encryption confirmation */ - if (intel_wait_for_register(&dev_priv->uncore, PORT_HDCP_STATUS(port), - HDCP_STATUS_ENC, HDCP_STATUS_ENC, - ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), + HDCP_STATUS_ENC, + ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Timed out waiting for encryption\n"); return -ETIMEDOUT; } @@ -727,16 +759,17 @@ static int _intel_hdcp_disable(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; DRM_DEBUG_KMS("[%s:%d] HDCP is being disabled...\n", connector->base.name, connector->base.base.id); hdcp->hdcp_encrypted = false; - I915_WRITE(PORT_HDCP_CONF(port), 0); - if (intel_wait_for_register(&dev_priv->uncore, - PORT_HDCP_STATUS(port), ~0, 0, - ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), 0); + if (intel_de_wait_for_clear(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), + ~0, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Failed to disable HDCP, timeout clearing status\n"); return -ETIMEDOUT; } @@ -807,9 +840,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* Check_link valid only when HDCP1.4 is enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -818,10 +853,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp_in_use(connector))) { + if (WARN_ON(!intel_hdcp_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("%s:%d HDCP link stopped encryption,%x\n", connector->base.name, connector->base.base.id, - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); @@ -866,7 +902,6 @@ static void intel_hdcp_prop_work(struct work_struct *work) prop_work); struct intel_connector *connector = intel_hdcp_to_connector(hdcp); struct drm_device *dev = connector->base.dev; - struct drm_connector_state *state; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); mutex_lock(&hdcp->mutex); @@ -876,10 +911,9 @@ static void intel_hdcp_prop_work(struct work_struct *work) * those to UNDESIRED is handled by core. If value == UNDESIRED, * we're running just after hdcp has been disabled, so just exit */ - if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - state = connector->base.state; - state->content_protection = hdcp->value; - } + if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) + drm_hdcp_update_content_protection(&connector->base, + hdcp->value); mutex_unlock(&hdcp->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); @@ -888,7 +922,7 @@ static void intel_hdcp_prop_work(struct work_struct *work) bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) { /* PORT E doesn't have HDCP, and PORT F is disabled */ - return INTEL_GEN(dev_priv) >= 9 && port < PORT_E; + return INTEL_INFO(dev_priv)->display.has_hdcp && port < PORT_E; } static int @@ -1207,8 +1241,10 @@ static int hdcp2_authentication_key_exchange(struct intel_connector *connector) if (ret < 0) return ret; - if (msgs.send_cert.rx_caps[0] != HDCP_2_2_RX_CAPS_VERSION_VAL) + if (msgs.send_cert.rx_caps[0] != HDCP_2_2_RX_CAPS_VERSION_VAL) { + DRM_DEBUG_KMS("cert.rx_caps dont claim HDCP2.2\n"); return -EINVAL; + } hdcp->is_repeater = HDCP_2_2_RX_REPEATER(msgs.send_cert.rx_caps[2]); @@ -1492,10 +1528,11 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS); - + WARN_ON(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS); if (hdcp->shim->toggle_signalling) { ret = hdcp->shim->toggle_signalling(intel_dig_port, true); if (ret) { @@ -1505,17 +1542,20 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) } } - if (I915_READ(HDCP2_STATUS_DDI(port)) & LINK_AUTH_STATUS) { + if (I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_AUTH_STATUS) { /* Link is Authenticated. Now set for Encryption */ - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) | + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, + port)) | CTL_LINK_ENCRYPTION_REQ); } - ret = intel_wait_for_register(&dev_priv->uncore, HDCP2_STATUS_DDI(port), - LINK_ENCRYPTION_STATUS, - LINK_ENCRYPTION_STATUS, - ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); + ret = intel_de_wait_for_set(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), + LINK_ENCRYPTION_STATUS, + ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); return ret; } @@ -1526,15 +1566,20 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(!(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS)); + WARN_ON(!(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS)); - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) & ~CTL_LINK_ENCRYPTION_REQ); + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, port)) & + ~CTL_LINK_ENCRYPTION_REQ); - ret = intel_wait_for_register(&dev_priv->uncore, HDCP2_STATUS_DDI(port), - LINK_ENCRYPTION_STATUS, 0x0, + ret = intel_de_wait_for_clear(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), + LINK_ENCRYPTION_STATUS, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); if (ret == -ETIMEDOUT) DRM_DEBUG_KMS("Disable Encryption Timedout"); @@ -1632,9 +1677,11 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* hdcp2_check_link is expected only when HDCP2.2 is Enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -1643,9 +1690,10 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp2_in_use(connector))) { + if (WARN_ON(!intel_hdcp2_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("HDCP2.2 link stopped the encryption, %x\n", - I915_READ(HDCP2_STATUS_DDI(port))); + I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); @@ -1749,14 +1797,56 @@ static const struct component_ops i915_hdcp_component_ops = { .unbind = i915_hdcp_component_unbind, }; -static inline int initialize_hdcp_port_data(struct intel_connector *connector) +static inline +enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) { + switch (port) { + case PORT_A: + return MEI_DDI_A; + case PORT_B ... PORT_F: + return (enum mei_fw_ddi)port; + default: + return MEI_DDI_INVALID_PORT; + } +} + +static inline +enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) +{ + switch (cpu_transcoder) { + case TRANSCODER_A ... TRANSCODER_D: + return (enum mei_fw_tc)(cpu_transcoder | 0x10); + default: /* eDP, DSI TRANSCODERS are non HDCP capable */ + return MEI_INVALID_TRANSCODER; + } +} + +static inline int initialize_hdcp_port_data(struct intel_connector *connector, + const struct intel_hdcp_shim *shim) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; struct hdcp_port_data *data = &hdcp->port_data; - data->port = connector->encoder->port; + if (INTEL_GEN(dev_priv) < 12) + data->fw_ddi = + intel_get_mei_fw_ddi_index(connector->encoder->port); + else + /* + * As per ME FW API expectation, for GEN 12+, fw_ddi is filled + * with zero(INVALID PORT index). + */ + data->fw_ddi = MEI_DDI_INVALID_PORT; + + /* + * As associated transcoder is set and modified at modeset, here fw_tc + * is initialized to zero (invalid transcoder index). This will be + * retained for <Gen12 forever. + */ + data->fw_tc = MEI_INVALID_TRANSCODER; + data->port_type = (u8)HDCP_PORT_TYPE_INTEGRATED; - data->protocol = (u8)hdcp->shim->protocol; + data->protocol = (u8)shim->protocol; data->k = 1; if (!data->streams) @@ -1780,7 +1870,7 @@ static bool is_hdcp2_supported(struct drm_i915_private *dev_priv) return false; return (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || - IS_KABYLAKE(dev_priv)); + IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)); } void intel_hdcp_component_init(struct drm_i915_private *dev_priv) @@ -1806,12 +1896,13 @@ void intel_hdcp_component_init(struct drm_i915_private *dev_priv) } } -static void intel_hdcp2_init(struct intel_connector *connector) +static void intel_hdcp2_init(struct intel_connector *connector, + const struct intel_hdcp_shim *shim) { struct intel_hdcp *hdcp = &connector->hdcp; int ret; - ret = initialize_hdcp_port_data(connector); + ret = initialize_hdcp_port_data(connector, shim); if (ret) { DRM_DEBUG_KMS("Mei hdcp data init failed\n"); return; @@ -1830,24 +1921,31 @@ int intel_hdcp_init(struct intel_connector *connector, if (!shim) return -EINVAL; - ret = drm_connector_attach_content_protection_property(&connector->base); - if (ret) + if (is_hdcp2_supported(dev_priv)) + intel_hdcp2_init(connector, shim); + + ret = + drm_connector_attach_content_protection_property(&connector->base, + hdcp->hdcp2_supported); + if (ret) { + hdcp->hdcp2_supported = false; + kfree(hdcp->port_data.streams); return ret; + } hdcp->shim = shim; mutex_init(&hdcp->mutex); INIT_DELAYED_WORK(&hdcp->check_work, intel_hdcp_check_work); INIT_WORK(&hdcp->prop_work, intel_hdcp_prop_work); - - if (is_hdcp2_supported(dev_priv)) - intel_hdcp2_init(connector); init_waitqueue_head(&hdcp->cp_irq_queue); return 0; } -int intel_hdcp_enable(struct intel_connector *connector) +int intel_hdcp_enable(struct intel_connector *connector, + enum transcoder cpu_transcoder, u8 content_type) { + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS; int ret = -EINVAL; @@ -1857,6 +1955,12 @@ int intel_hdcp_enable(struct intel_connector *connector) mutex_lock(&hdcp->mutex); WARN_ON(hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); + hdcp->content_type = content_type; + + if (INTEL_GEN(dev_priv) >= 12) { + hdcp->cpu_transcoder = cpu_transcoder; + hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); + } /* * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup @@ -1868,8 +1972,12 @@ int intel_hdcp_enable(struct intel_connector *connector) check_link_interval = DRM_HDCP2_CHECK_PERIOD_MS; } - /* When HDCP2.2 fails, HDCP1.4 will be attempted */ - if (ret && intel_hdcp_capable(connector)) { + /* + * When HDCP2.2 fails and Content Type is not Type1, HDCP1.4 will + * be attempted. + */ + if (ret && intel_hdcp_capable(connector) && + hdcp->content_type != DRM_MODE_HDCP_CONTENT_TYPE1) { ret = _intel_hdcp_enable(connector); } @@ -1951,12 +2059,15 @@ void intel_hdcp_atomic_check(struct drm_connector *connector, /* * Nothing to do if the state didn't change, or HDCP was activated since - * the last commit + * the last commit. And also no change in hdcp content type. */ if (old_cp == new_cp || (old_cp == DRM_MODE_CONTENT_PROTECTION_DESIRED && - new_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED)) - return; + new_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED)) { + if (old_state->hdcp_content_type == + new_state->hdcp_content_type) + return; + } crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index be8da85c866a..f3c3272e712a 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -15,13 +15,16 @@ struct drm_connector_state; struct drm_i915_private; struct intel_connector; struct intel_hdcp_shim; +enum port; +enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); int intel_hdcp_init(struct intel_connector *connector, const struct intel_hdcp_shim *hdcp_shim); -int intel_hdcp_enable(struct intel_connector *connector); +int intel_hdcp_enable(struct intel_connector *connector, + enum transcoder cpu_transcoder, u8 content_type); int intel_hdcp_disable(struct intel_connector *connector); bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port); bool intel_hdcp_capable(struct intel_connector *connector); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 0ebec69bbbfc..93ac0f296852 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -45,17 +45,17 @@ #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_display_types.h" #include "intel_dp.h" #include "intel_dpio_phy.h" -#include "intel_drv.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" #include "intel_hdcp.h" #include "intel_hdmi.h" #include "intel_hotplug.h" #include "intel_lspcon.h" -#include "intel_sdvo.h" #include "intel_panel.h" +#include "intel_sdvo.h" #include "intel_sideband.h" static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi) @@ -85,16 +85,17 @@ assert_hdmi_transcoder_func_disabled(struct drm_i915_private *dev_priv, "HDMI transcoder function enabled, expecting disabled\n"); } -struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder) +struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder) { struct intel_digital_port *intel_dig_port = - container_of(encoder, struct intel_digital_port, base.base); + container_of(&encoder->base, struct intel_digital_port, + base.base); return &intel_dig_port->hdmi; } -static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) +static struct intel_hdmi *intel_attached_hdmi(struct intel_connector *connector) { - return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base); + return enc_to_intel_hdmi(intel_attached_encoder(connector)); } static u32 g4x_infoframe_index(unsigned int type) @@ -189,13 +190,19 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, } } -static int hsw_dip_data_size(unsigned int type) +static int hsw_dip_data_size(struct drm_i915_private *dev_priv, + unsigned int type) { switch (type) { case DP_SDP_VSC: return VIDEO_DIP_VSC_DATA_SIZE; case DP_SDP_PPS: return VIDEO_DIP_PPS_DATA_SIZE; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + if (INTEL_GEN(dev_priv) >= 11) + return VIDEO_DIP_GMP_DATA_SIZE; + else + return VIDEO_DIP_DATA_SIZE; default: return VIDEO_DIP_DATA_SIZE; } @@ -279,7 +286,7 @@ static void ibx_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); int i; @@ -315,7 +322,7 @@ static void ibx_read_infoframe(struct intel_encoder *encoder, void *frame, ssize_t len) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); u32 val, *data = frame; int i; @@ -334,7 +341,7 @@ static u32 ibx_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe; i915_reg_t reg = TVIDEO_DIP_CTL(pipe); u32 val = I915_READ(reg); @@ -356,7 +363,7 @@ static void cpt_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); int i; @@ -395,7 +402,7 @@ static void cpt_read_infoframe(struct intel_encoder *encoder, void *frame, ssize_t len) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); u32 val, *data = frame; int i; @@ -414,7 +421,7 @@ static u32 cpt_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe; u32 val = I915_READ(TVIDEO_DIP_CTL(pipe)); if ((val & VIDEO_DIP_ENABLE) == 0) @@ -432,7 +439,7 @@ static void vlv_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); int i; @@ -468,7 +475,7 @@ static void vlv_read_infoframe(struct intel_encoder *encoder, void *frame, ssize_t len) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); u32 val, *data = frame; int i; @@ -487,7 +494,7 @@ static u32 vlv_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe; u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(pipe)); if ((val & VIDEO_DIP_ENABLE) == 0) @@ -514,7 +521,9 @@ static void hsw_write_infoframe(struct intel_encoder *encoder, int i; u32 val = I915_READ(ctl_reg); - data_size = hsw_dip_data_size(type); + data_size = hsw_dip_data_size(dev_priv, type); + + WARN_ON(len > data_size); val &= ~hsw_infoframe_enable(type); I915_WRITE(ctl_reg, val); @@ -594,7 +603,7 @@ u32 intel_hdmi_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); u32 val, ret = 0; int i; @@ -638,7 +647,7 @@ static void intel_write_infoframe(struct intel_encoder *encoder, enum hdmi_infoframe_type type, const union hdmi_infoframe *frame) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); u8 buffer[VIDEO_DIP_DATA_SIZE]; ssize_t len; @@ -667,7 +676,7 @@ void intel_read_infoframe(struct intel_encoder *encoder, enum hdmi_infoframe_type type, union hdmi_infoframe *frame) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); u8 buffer[VIDEO_DIP_DATA_SIZE]; int ret; @@ -700,7 +709,7 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder, { struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; int ret; @@ -724,11 +733,20 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder, drm_hdmi_avi_infoframe_colorspace(frame, conn_state); - drm_hdmi_avi_infoframe_quant_range(frame, connector, - adjusted_mode, - crtc_state->limited_color_range ? - HDMI_QUANTIZATION_RANGE_LIMITED : - HDMI_QUANTIZATION_RANGE_FULL); + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) { + drm_hdmi_avi_infoframe_quant_range(frame, connector, + adjusted_mode, + crtc_state->limited_color_range ? + HDMI_QUANTIZATION_RANGE_LIMITED : + HDMI_QUANTIZATION_RANGE_FULL); + } else { + frame->quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; + } drm_hdmi_avi_infoframe_content_type(frame, conn_state); @@ -787,7 +805,7 @@ intel_hdmi_compute_hdmi_infoframe(struct intel_encoder *encoder, ret = drm_hdmi_vendor_infoframe_from_display_mode(frame, conn_state->connector, - &crtc_state->base.adjusted_mode); + &crtc_state->hw.adjusted_mode); if (WARN_ON(ret)) return false; @@ -838,7 +856,7 @@ static void g4x_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = VIDEO_DIP_CTL; u32 val = I915_READ(reg); @@ -948,7 +966,7 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg; if ((crtc_state->infoframes.enable & @@ -973,7 +991,7 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg; if ((crtc_state->infoframes.enable & @@ -1010,7 +1028,7 @@ static void intel_hdmi_compute_gcp_infoframe(struct intel_encoder *encoder, /* Enable default_phase whenever the display mode is suitably aligned */ if (gcp_default_phase_possible(crtc_state->pipe_bpp, - &crtc_state->base.adjusted_mode)) + &crtc_state->hw.adjusted_mode)) crtc_state->infoframes.gcp |= GCP_DEFAULT_PHASE_ENABLE; } @@ -1020,8 +1038,8 @@ static void ibx_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); @@ -1079,8 +1097,8 @@ static void cpt_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); @@ -1128,8 +1146,8 @@ static void vlv_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1491,7 +1509,10 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) { struct drm_i915_private *dev_priv = intel_dig_port->base.base.dev->dev_private; + struct intel_connector *connector = + intel_dig_port->hdmi.attached_connector; enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; int ret; union { u32 reg; @@ -1502,41 +1523,31 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) if (ret) return false; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return false; } return true; } -static struct hdcp2_hdmi_msg_data { +struct hdcp2_hdmi_msg_timeout { u8 msg_id; - u32 timeout; - u32 timeout2; - } hdcp2_msg_data[] = { - {HDCP_2_2_AKE_INIT, 0, 0}, - {HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0}, - {HDCP_2_2_AKE_NO_STORED_KM, 0, 0}, - {HDCP_2_2_AKE_STORED_KM, 0, 0}, - {HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS}, - {HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, - 0}, - {HDCP_2_2_LC_INIT, 0, 0}, - {HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0}, - {HDCP_2_2_SKE_SEND_EKS, 0, 0}, - {HDCP_2_2_REP_SEND_RECVID_LIST, - HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0}, - {HDCP_2_2_REP_SEND_ACK, 0, 0}, - {HDCP_2_2_REP_STREAM_MANAGE, 0, 0}, - {HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, - 0}, - }; + u16 timeout; +}; + +static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { + { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, }, + { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, }, + { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, }, + { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, }, +}; static int intel_hdmi_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port, @@ -1552,12 +1563,17 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired) { int i; - for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++) - if (hdcp2_msg_data[i].msg_id == msg_id && - (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired)) - return hdcp2_msg_data[i].timeout; - else if (hdcp2_msg_data[i].msg_id == msg_id) - return hdcp2_msg_data[i].timeout2; + if (msg_id == HDCP_2_2_AKE_SEND_HPRIME) { + if (is_paired) + return HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS; + else + return HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS; + } + + for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) { + if (hdcp2_msg_timeout[i].msg_id == msg_id) + return hdcp2_msg_timeout[i].timeout; + } return -EINVAL; } @@ -1721,9 +1737,9 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; u32 hdmi_val; intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); @@ -1759,7 +1775,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); intel_wakeref_t wakeref; bool ret; @@ -1778,7 +1794,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, static void intel_hdmi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp, flags = 0; @@ -1814,7 +1830,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, tmp & HDMI_COLOR_RANGE_16_235) pipe_config->limited_color_range = true; - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; if ((tmp & SDVO_COLOR_FORMAT_MASK) == HDMI_COLOR_FORMAT_12bpc) dotclock = pipe_config->port_clock * 2 / 3; @@ -1824,7 +1840,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, if (pipe_config->pixel_multiplier) dotclock /= pipe_config->pixel_multiplier; - pipe_config->base.adjusted_mode.crtc_clock = dotclock; + pipe_config->hw.adjusted_mode.crtc_clock = dotclock; pipe_config->lane_count = 4; @@ -1845,7 +1861,7 @@ static void intel_enable_hdmi_audio(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); WARN_ON(!pipe_config->has_hdmi_sink); DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n", @@ -1859,7 +1875,7 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); @@ -1881,7 +1897,7 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); @@ -1931,8 +1947,8 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); enum pipe pipe = crtc->pipe; u32 temp; @@ -1992,10 +2008,10 @@ static void intel_disable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct intel_digital_port *intel_dig_port = hdmi_to_dig_port(intel_hdmi); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); @@ -2145,7 +2161,7 @@ static enum drm_mode_status intel_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_hdmi *hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector)); struct drm_device *dev = intel_hdmi_to_dev(hdmi); struct drm_i915_private *dev_priv = to_i915(dev); enum drm_mode_status status; @@ -2185,20 +2201,22 @@ intel_hdmi_mode_valid(struct drm_connector *connector, status = hdmi_port_clock_valid(hdmi, clock * 5 / 4, true, force_dvi); } + if (status != MODE_OK) + return status; - return status; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, int bpc) { struct drm_i915_private *dev_priv = - to_i915(crtc_state->base.crtc->dev); - struct drm_atomic_state *state = crtc_state->base.state; + to_i915(crtc_state->uapi.crtc->dev); + struct drm_atomic_state *state = crtc_state->uapi.state; struct drm_connector_state *connector_state; struct drm_connector *connector; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; int i; if (HAS_GMCH(dev_priv)) @@ -2223,7 +2241,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, for_each_new_connector_in_state(state, connector, connector_state, i) { const struct drm_display_info *info = &connector->display_info; - if (connector_state->crtc != crtc_state->base.crtc) + if (connector_state->crtc != crtc_state->uapi.crtc) continue; if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) { @@ -2262,22 +2280,15 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, static bool intel_hdmi_ycbcr420_config(struct drm_connector *connector, - struct intel_crtc_state *config, - int *clock_12bpc, int *clock_10bpc, - int *clock_8bpc) + struct intel_crtc_state *config) { - struct intel_crtc *intel_crtc = to_intel_crtc(config->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(config->uapi.crtc); if (!connector->ycbcr_420_allowed) { DRM_ERROR("Platform doesn't support YCBCR420 output\n"); return false; } - /* YCBCR420 TMDS rate requirement is half the pixel clock */ - config->port_clock /= 2; - *clock_12bpc /= 2; - *clock_10bpc /= 2; - *clock_8bpc /= 2; config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; /* YCBCR 420 output conversion needs a scaler */ @@ -2292,22 +2303,117 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return true; } +static int intel_hdmi_port_clock(int clock, int bpc) +{ + /* + * Need to adjust the port link by: + * 1.5x for 12bpc + * 1.25x for 10bpc + */ + return clock * bpc / 8; +} + +static int intel_hdmi_compute_bpc(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int clock, bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + int bpc; + + for (bpc = 12; bpc >= 10; bpc -= 2) { + if (hdmi_deep_color_possible(crtc_state, bpc) && + hdmi_port_clock_valid(intel_hdmi, + intel_hdmi_port_clock(clock, bpc), + true, force_dvi) == MODE_OK) + return bpc; + } + + return 8; +} + +static int intel_hdmi_compute_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + int bpc, clock = adjusted_mode->crtc_clock; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) + clock *= 2; + + /* YCBCR420 TMDS rate requirement is half the pixel clock */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + clock /= 2; + + bpc = intel_hdmi_compute_bpc(encoder, crtc_state, + clock, force_dvi); + + crtc_state->port_clock = intel_hdmi_port_clock(clock, bpc); + + /* + * pipe_bpp could already be below 8bpc due to + * FDI bandwidth constraints. We shouldn't bump it + * back up to 8bpc in that case. + */ + if (crtc_state->pipe_bpp > bpc * 3) + crtc_state->pipe_bpp = bpc * 3; + + DRM_DEBUG_KMS("picking %d bpc for HDMI output (pipe bpp: %d)\n", + bpc, crtc_state->pipe_bpp); + + if (hdmi_port_clock_valid(intel_hdmi, crtc_state->port_clock, + false, force_dvi) != MODE_OK) { + DRM_DEBUG_KMS("unsupported HDMI clock (%d kHz), rejecting mode\n", + crtc_state->port_clock); + return -EINVAL; + } + + return 0; +} + +static bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; + + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { + /* See CEA-861-E - 5.1 Default Encoding Parameters */ + return crtc_state->has_hdmi_sink && + drm_default_rgb_quant_range(adjusted_mode) == + HDMI_QUANTIZATION_RANGE_LIMITED; + } else { + return intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; + } +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; struct drm_scdc *scdc = &connector->display_info.hdmi.scdc; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; - int clock_10bpc = clock_8bpc * 5 / 4; - int clock_12bpc = clock_8bpc * 3 / 2; - int desired_bpp; bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; + int ret; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -2318,33 +2424,19 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (pipe_config->has_hdmi_sink) pipe_config->has_infoframe = true; - if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { - /* See CEA-861-E - 5.1 Default Encoding Parameters */ - pipe_config->limited_color_range = - pipe_config->has_hdmi_sink && - drm_default_rgb_quant_range(adjusted_mode) == - HDMI_QUANTIZATION_RANGE_LIMITED; - } else { - pipe_config->limited_color_range = - intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; - } - - if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) { + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; - clock_8bpc *= 2; - clock_10bpc *= 2; - clock_12bpc *= 2; - } if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) { - if (!intel_hdmi_ycbcr420_config(connector, pipe_config, - &clock_12bpc, &clock_10bpc, - &clock_8bpc)) { + if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) { DRM_ERROR("Can't support YCBCR420 output\n"); return -EINVAL; } } + pipe_config->limited_color_range = + intel_hdmi_limited_color_range(pipe_config, conn_state); + if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; @@ -2356,46 +2448,13 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, intel_conn_state->force_audio == HDMI_AUDIO_ON; } - /* - * Note that g4x/vlv don't support 12bpc hdmi outputs. We also need - * to check that the higher clock still fits within limits. - */ - if (hdmi_deep_color_possible(pipe_config, 12) && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); - desired_bpp = 12*3; - - /* Need to adjust the port link by 1.5x for 12bpc. */ - pipe_config->port_clock = clock_12bpc; - } else if (hdmi_deep_color_possible(pipe_config, 10) && - hdmi_port_clock_valid(intel_hdmi, clock_10bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 10 for HDMI output\n"); - desired_bpp = 10 * 3; - - /* Need to adjust the port link by 1.25x for 10bpc. */ - pipe_config->port_clock = clock_10bpc; - } else { - DRM_DEBUG_KMS("picking bpc to 8 for HDMI output\n"); - desired_bpp = 8*3; - - pipe_config->port_clock = clock_8bpc; - } - - if (!pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp); - pipe_config->pipe_bpp = desired_bpp; - } - - if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock, - false, force_dvi) != MODE_OK) { - DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n"); - return -EINVAL; - } + ret = intel_hdmi_compute_clock(encoder, pipe_config, force_dvi); + if (ret) + return ret; - /* Set user selected PAR to incoming mode's member */ - adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio; + if (conn_state->picture_aspect_ratio) + adjusted_mode->picture_aspect_ratio = + conn_state->picture_aspect_ratio; pipe_config->lane_count = 4; @@ -2438,7 +2497,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, static void intel_hdmi_unset_edid(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); intel_hdmi->has_hdmi_sink = false; intel_hdmi->has_audio = false; @@ -2454,7 +2513,7 @@ static void intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector)); enum port port = hdmi_to_dig_port(hdmi)->base.port; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); @@ -2501,7 +2560,7 @@ static bool intel_hdmi_set_edid(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); intel_wakeref_t wakeref; struct edid *edid; bool connected = false; @@ -2542,7 +2601,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) { enum drm_connector_status status = connector_status_disconnected; struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base; intel_wakeref_t wakeref; @@ -2566,6 +2625,12 @@ out: if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } @@ -2599,7 +2664,7 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); intel_hdmi_prepare(encoder, pipe_config); @@ -2612,7 +2677,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); vlv_phy_pre_encoder_enable(encoder, pipe_config); @@ -2682,7 +2747,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -2708,7 +2773,7 @@ static struct i2c_adapter * intel_hdmi_get_i2c_adapter(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); return intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); } @@ -2752,8 +2817,9 @@ intel_hdmi_connector_register(struct drm_connector *connector) static void intel_hdmi_destroy(struct drm_connector *connector) { - if (intel_attached_hdmi(connector)->cec_notifier) - cec_notifier_put(intel_attached_hdmi(connector)->cec_notifier); + struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier; + + cec_notifier_conn_unregister(n); intel_connector_destroy(connector); } @@ -2808,7 +2874,6 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c intel_attach_colorspace_property(connector); drm_connector_attach_content_type_property(connector); - connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) drm_object_attach_property(&connector->base, @@ -2842,7 +2907,7 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, bool scrambling) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_scrambling *sink_scrambling = &connector->display_info.hdmi.scdc.scrambling; struct i2c_adapter *adapter = @@ -2930,51 +2995,34 @@ static u8 cnp_port_to_ddc_pin(struct drm_i915_private *dev_priv, static u8 icl_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { - u8 ddc_pin; + enum phy phy = intel_port_to_phy(dev_priv, port); - switch (port) { - case PORT_A: - ddc_pin = GMBUS_PIN_1_BXT; - break; - case PORT_B: - ddc_pin = GMBUS_PIN_2_BXT; - break; - case PORT_C: - ddc_pin = GMBUS_PIN_9_TC1_ICP; - break; - case PORT_D: - ddc_pin = GMBUS_PIN_10_TC2_ICP; - break; - case PORT_E: - ddc_pin = GMBUS_PIN_11_TC3_ICP; - break; - case PORT_F: - ddc_pin = GMBUS_PIN_12_TC4_ICP; - break; - default: - MISSING_CASE(port); - ddc_pin = GMBUS_PIN_2_BXT; - break; - } - return ddc_pin; + if (intel_phy_is_combo(dev_priv, phy)) + return GMBUS_PIN_1_BXT + port; + else if (intel_phy_is_tc(dev_priv, phy)) + return GMBUS_PIN_9_TC1_ICP + intel_port_to_tc(dev_priv, port); + + WARN(1, "Unknown port:%c\n", port_name(port)); + return GMBUS_PIN_2_BXT; } static u8 mcc_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { + enum phy phy = intel_port_to_phy(dev_priv, port); u8 ddc_pin; - switch (port) { - case PORT_A: + switch (phy) { + case PHY_A: ddc_pin = GMBUS_PIN_1_BXT; break; - case PORT_B: + case PHY_B: ddc_pin = GMBUS_PIN_2_BXT; break; - case PORT_C: + case PHY_C: ddc_pin = GMBUS_PIN_9_TC1_ICP; break; default: - MISSING_CASE(port); + MISSING_CASE(phy); ddc_pin = GMBUS_PIN_1_BXT; break; } @@ -3019,7 +3067,7 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); - else if (HAS_PCH_ICP(dev_priv)) + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); @@ -3084,18 +3132,29 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct i2c_adapter *ddc; enum port port = intel_encoder->port; + struct cec_connector_info conn_info; + + DRM_DEBUG_KMS("Adding HDMI connector on [ENCODER:%d:%s]\n", + intel_encoder->base.base.id, intel_encoder->base.name); - DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", - port_name(port)); + if (INTEL_GEN(dev_priv) < 12 && WARN_ON(port == PORT_A)) + return; if (WARN(intel_dig_port->max_lanes < 4, - "Not enough lanes (%d) for HDMI on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return; - drm_connector_init(dev, connector, &intel_hdmi_connector_funcs, - DRM_MODE_CONNECTOR_HDMIA); + intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port); + ddc = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); + + drm_connector_init_with_ddc(dev, connector, + &intel_hdmi_connector_funcs, + DRM_MODE_CONNECTOR_HDMIA, + ddc); drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs); connector->interlace_allowed = 1; @@ -3105,10 +3164,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) connector->ycbcr_420_allowed = true; - intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port); - - if (WARN_ON(port == PORT_A)) - return; intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); if (HAS_DDI(dev_priv)) @@ -3137,12 +3192,41 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd); } - intel_hdmi->cec_notifier = cec_notifier_get_conn(dev->dev, - port_identifier(port)); + cec_fill_conn_info_from_drm(&conn_info, connector); + + intel_hdmi->cec_notifier = + cec_notifier_conn_register(dev->dev, port_identifier(port), + &conn_info); if (!intel_hdmi->cec_notifier) DRM_DEBUG_KMS("CEC notifier get failed\n"); } +static enum intel_hotplug_state +intel_hdmi_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, bool irq_received) +{ + enum intel_hotplug_state state; + + state = intel_encoder_hotplug(encoder, connector, irq_received); + + /* + * On many platforms the HDMI live state signal is known to be + * unreliable, so we can't use it to detect if a sink is connected or + * not. Instead we detect if it's connected based on whether we can + * read the EDID or not. That in turn has a problem during disconnect, + * since the HPD interrupt may be raised before the DDC lines get + * disconnected (due to how the required length of DDC vs. HPD + * connector pins are specified) and so we'll still be able to get a + * valid EDID. To solve this schedule another detection cycle if this + * time around we didn't detect any change in the sink's connection + * status. + */ + if (state == INTEL_HOTPLUG_UNCHANGED && irq_received) + state = INTEL_HOTPLUG_RETRY; + + return state; +} + void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port) { @@ -3166,7 +3250,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); - intel_encoder->hotplug = intel_encoder_hotplug; + intel_encoder->hotplug = intel_hdmi_hotplug; intel_encoder->compute_config = intel_hdmi_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { intel_encoder->disable = pch_disable_hdmi; @@ -3202,11 +3286,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->pipe_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 106c2e0bc3c9..d3659d0b408b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -23,12 +23,13 @@ struct intel_crtc_state; struct intel_hdmi; struct drm_connector_state; union hdmi_infoframe; +enum port; void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); -struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); +struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder); int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index ea3de4acc850..99d3a3c7989e 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -26,7 +26,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_hotplug.h" /** @@ -104,6 +104,12 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, if (IS_CNL_WITH_PORT_F(dev_priv)) return HPD_PORT_E; return HPD_PORT_F; + case PORT_G: + return HPD_PORT_G; + case PORT_H: + return HPD_PORT_H; + case PORT_I: + return HPD_PORT_I; default: MISSING_CASE(port); return HPD_NONE; @@ -112,6 +118,7 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, #define HPD_STORM_DETECT_PERIOD 1000 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) +#define HPD_RETRY_DELAY 1000 /** * intel_hpd_irq_storm_detect - gather stats and detect HPD IRQ storm on a pin @@ -266,8 +273,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); } -bool intel_encoder_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector) +enum intel_hotplug_state +intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received) { struct drm_device *dev = connector->base.dev; enum drm_connector_status old_status; @@ -279,7 +288,7 @@ bool intel_encoder_hotplug(struct intel_encoder *encoder, drm_helper_probe_detect(&connector->base, NULL, false); if (old_status == connector->base.status) - return false; + return INTEL_HOTPLUG_UNCHANGED; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", connector->base.base.id, @@ -287,13 +296,13 @@ bool intel_encoder_hotplug(struct intel_encoder *encoder, drm_get_connector_status_name(old_status), drm_get_connector_status_name(connector->base.status)); - return true; + return INTEL_HOTPLUG_CHANGED; } static bool intel_encoder_has_hpd_pulse(struct intel_encoder *encoder) { return intel_encoder_is_dig_port(encoder) && - enc_to_dig_port(&encoder->base)->hpd_pulse != NULL; + enc_to_dig_port(encoder)->hpd_pulse != NULL; } static void i915_digport_work_func(struct work_struct *work) @@ -326,7 +335,7 @@ static void i915_digport_work_func(struct work_struct *work) if (!long_hpd && !short_hpd) continue; - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); ret = dig_port->hpd_pulse(dig_port, long_hpd); if (ret == IRQ_NONE) { @@ -339,7 +348,7 @@ static void i915_digport_work_func(struct work_struct *work) spin_lock_irq(&dev_priv->irq_lock); dev_priv->hotplug.event_bits |= old_bits; spin_unlock_irq(&dev_priv->irq_lock); - schedule_work(&dev_priv->hotplug.hotplug_work); + queue_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work, 0); } } @@ -349,14 +358,16 @@ static void i915_digport_work_func(struct work_struct *work) static void i915_hotplug_work_func(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, hotplug.hotplug_work); + container_of(work, struct drm_i915_private, + hotplug.hotplug_work.work); struct drm_device *dev = &dev_priv->drm; struct intel_connector *intel_connector; struct intel_encoder *intel_encoder; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; - bool changed = false; + u32 changed = 0, retry = 0; u32 hpd_event_bits; + u32 hpd_retry_bits; mutex_lock(&dev->mode_config.mutex); DRM_DEBUG_KMS("running encoder hotplug functions\n"); @@ -365,6 +376,8 @@ static void i915_hotplug_work_func(struct work_struct *work) hpd_event_bits = dev_priv->hotplug.event_bits; dev_priv->hotplug.event_bits = 0; + hpd_retry_bits = dev_priv->hotplug.retry_bits; + dev_priv->hotplug.retry_bits = 0; /* Enable polling for connectors which had HPD IRQ storms */ intel_hpd_irq_storm_switch_to_polling(dev_priv); @@ -373,16 +386,29 @@ static void i915_hotplug_work_func(struct work_struct *work) drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { + u32 hpd_bit; + intel_connector = to_intel_connector(connector); if (!intel_connector->encoder) continue; intel_encoder = intel_connector->encoder; - if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) { + hpd_bit = BIT(intel_encoder->hpd_pin); + if ((hpd_event_bits | hpd_retry_bits) & hpd_bit) { DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", connector->name, intel_encoder->hpd_pin); - changed |= intel_encoder->hotplug(intel_encoder, - intel_connector); + switch (intel_encoder->hotplug(intel_encoder, + intel_connector, + hpd_event_bits & hpd_bit)) { + case INTEL_HOTPLUG_UNCHANGED: + break; + case INTEL_HOTPLUG_CHANGED: + changed |= hpd_bit; + break; + case INTEL_HOTPLUG_RETRY: + retry |= hpd_bit; + break; + } } } drm_connector_list_iter_end(&conn_iter); @@ -390,6 +416,17 @@ static void i915_hotplug_work_func(struct work_struct *work) if (changed) drm_kms_helper_hotplug_event(dev); + + /* Remove shared HPD pins that have changed */ + retry &= ~changed; + if (retry) { + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->hotplug.retry_bits |= retry; + spin_unlock_irq(&dev_priv->irq_lock); + + mod_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work, + msecs_to_jiffies(HPD_RETRY_DELAY)); + } } @@ -444,7 +481,8 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, long_hpd = long_mask & BIT(pin); - DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), + DRM_DEBUG_DRIVER("digital hpd on [ENCODER:%d:%s] - %s\n", + encoder->base.base.id, encoder->base.name, long_hpd ? "long" : "short"); queue_dig = true; @@ -516,7 +554,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (queue_dig) queue_work(dev_priv->hotplug.dp_wq, &dev_priv->hotplug.dig_port_work); if (queue_hp) - schedule_work(&dev_priv->hotplug.hotplug_work); + queue_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work, 0); } /** @@ -636,7 +674,8 @@ void intel_hpd_poll_init(struct drm_i915_private *dev_priv) void intel_hpd_init_work(struct drm_i915_private *dev_priv) { - INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func); + INIT_DELAYED_WORK(&dev_priv->hotplug.hotplug_work, + i915_hotplug_work_func); INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func); INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work); INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work, @@ -650,11 +689,12 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv) dev_priv->hotplug.long_port_mask = 0; dev_priv->hotplug.short_port_mask = 0; dev_priv->hotplug.event_bits = 0; + dev_priv->hotplug.retry_bits = 0; spin_unlock_irq(&dev_priv->irq_lock); cancel_work_sync(&dev_priv->hotplug.dig_port_work); - cancel_work_sync(&dev_priv->hotplug.hotplug_work); + cancel_delayed_work_sync(&dev_priv->hotplug.hotplug_work); cancel_work_sync(&dev_priv->hotplug.poll_init_work); cancel_delayed_work_sync(&dev_priv->hotplug.reenable_work); } diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h index 805f897dbb7a..087b5f57b321 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.h +++ b/drivers/gpu/drm/i915/display/intel_hotplug.h @@ -13,10 +13,12 @@ struct drm_i915_private; struct intel_connector; struct intel_encoder; +enum port; void intel_hpd_poll_init(struct drm_i915_private *dev_priv); -bool intel_encoder_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector); +enum intel_hotplug_state intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received); void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); void intel_hpd_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index b19800b58442..0b67f7887cd0 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -114,7 +114,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); - pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes; + pdata->num_pipes = INTEL_NUM_PIPES(dev_priv); pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */ pdata->port[0].pipe = -1; pdata->port[1].pipe = -1; diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index 7028d0cf3bb1..d807c5648c87 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -27,8 +27,8 @@ #include <drm/drm_dp_dual_mode_helper.h> #include <drm/drm_edid.h> +#include "intel_display_types.h" #include "intel_dp.h" -#include "intel_drv.h" #include "intel_lspcon.h" /* LSPCON OUI Vendor ID(signatures) */ @@ -189,7 +189,7 @@ void lspcon_ycbcr420_config(struct drm_connector *connector, { const struct drm_display_info *info = &connector->display_info; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; if (drm_mode_is_420_only(info, adjusted_mode) && connector->ycbcr_420_allowed) { @@ -434,8 +434,8 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, const void *frame, ssize_t len) { bool ret; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); /* LSPCON only needs AVI IF */ if (type != HDMI_INFOFRAME_TYPE_AVI) @@ -472,10 +472,10 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, ssize_t ret; union hdmi_infoframe frame; u8 buf[VIDEO_DIP_DATA_SIZE]; - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_lspcon *lspcon = &dig_port->lspcon; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; if (!lspcon->active) { DRM_ERROR("Writing infoframes while LSPCON disabled ?\n"); @@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { /* FIXME actually read this from the hw */ - return enc_to_intel_lspcon(&encoder->base)->active; + return enc_to_intel_lspcon(encoder)->active; } void lspcon_resume(struct intel_lspcon *lspcon) diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index efefed62a7f8..10696bb99dcf 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -42,7 +42,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_gmbus.h" #include "intel_lvds.h" #include "intel_panel.h" @@ -135,7 +135,7 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_PVSYNC; - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; if (INTEL_GEN(dev_priv) < 5) pipe_config->gmch_pfit.lvds_border_bits = @@ -148,7 +148,7 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE; } - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock; } static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, @@ -230,9 +230,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - int pipe = crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + enum pipe pipe = crtc->pipe; u32 temp; if (HAS_PCH_SPLIT(dev_priv)) { @@ -318,8 +318,7 @@ static void intel_enable_lvds(struct intel_encoder *encoder, I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(&dev_priv->uncore, - PP_STATUS(0), PP_ON, PP_ON, 5000)) + if (intel_de_wait_for_set(dev_priv, PP_STATUS(0), PP_ON, 5000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); @@ -333,8 +332,7 @@ static void intel_disable_lvds(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON); - if (intel_wait_for_register(&dev_priv->uncore, - PP_STATUS(0), PP_ON, 0, 1000)) + if (intel_de_wait_for_clear(dev_priv, PP_STATUS(0), PP_ON, 1000)) DRM_ERROR("timed out waiting for panel to power off\n"); I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN); @@ -394,8 +392,8 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, to_lvds_encoder(&intel_encoder->base); struct intel_connector *intel_connector = lvds_encoder->attached_connector; - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); unsigned int lvds_bpp; /* Should never happen!! */ @@ -901,12 +899,10 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; - if (HAS_PCH_SPLIT(dev_priv)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); - else if (IS_GEN(dev_priv, 4)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + if (INTEL_GEN(dev_priv) < 4) + intel_encoder->pipe_mask = BIT(PIPE_B); else - intel_encoder->crtc_mask = (1 << 1); + intel_encoder->pipe_mask = ~0; drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 824881271351..e59b4992ba1b 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -35,7 +35,7 @@ #include "display/intel_panel.h" #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_opregion.h" #define OPREGION_HEADER_OFFSET 0 @@ -941,6 +941,13 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (mboxes & MBOX_ACPI) { DRM_DEBUG_DRIVER("Public ACPI methods supported\n"); opregion->acpi = base + OPREGION_ACPI_OFFSET; + /* + * Indicate we handle monitor hotplug events ourselves so we do + * not need ACPI notifications for them. Disabling these avoids + * triggering the AML code doing the notifation, which may be + * broken as Windows also seems to disable these. + */ + opregion->acpi->chpd = 1; } if (mboxes & MBOX_SWSCI) { diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 21339b7f6a3e..e40c3a0e2cd7 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -30,10 +30,11 @@ #include <drm/i915_drm.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_reg.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_frontbuffer.h" #include "intel_overlay.h" @@ -175,6 +176,7 @@ struct overlay_registers { struct intel_overlay { struct drm_i915_private *i915; + struct intel_context *context; struct intel_crtc *crtc; struct i915_vma *vma; struct i915_vma *old_vma; @@ -190,7 +192,8 @@ struct intel_overlay { struct overlay_registers __iomem *regs; u32 flip_addr; /* flip handling */ - struct i915_active_request last_flip; + struct i915_active last_flip; + void (*flip_complete)(struct intel_overlay *ovl); }; static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, @@ -216,32 +219,25 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, PCI_DEVFN(0, 0), I830_CLOCK_GATE, val); } -static void intel_overlay_submit_request(struct intel_overlay *overlay, - struct i915_request *rq, - i915_active_retire_fn retire) +static struct i915_request * +alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *)) { - GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip, - &overlay->i915->drm.struct_mutex)); - i915_active_request_set_retire_fn(&overlay->last_flip, retire, - &overlay->i915->drm.struct_mutex); - __i915_active_request_set(&overlay->last_flip, rq); - i915_request_add(rq); -} + struct i915_request *rq; + int err; -static int intel_overlay_do_wait_request(struct intel_overlay *overlay, - struct i915_request *rq, - i915_active_retire_fn retire) -{ - intel_overlay_submit_request(overlay, rq, retire); - return i915_active_request_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); -} + overlay->flip_complete = fn; -static struct i915_request *alloc_request(struct intel_overlay *overlay) -{ - struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; + rq = i915_request_create(overlay->context); + if (IS_ERR(rq)) + return rq; - return i915_request_create(engine->kernel_context); + err = i915_active_add_request(&overlay->last_flip, rq); + if (err) { + i915_request_add(rq); + return ERR_PTR(err); + } + + return rq; } /* overlay needs to be disable in OCMD reg */ @@ -253,7 +249,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) WARN_ON(overlay->active); - rq = alloc_request(overlay); + rq = alloc_request(overlay, NULL); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -274,19 +270,30 @@ static int intel_overlay_on(struct intel_overlay *overlay) *cs++ = MI_NOOP; intel_ring_advance(rq, cs); - return intel_overlay_do_wait_request(overlay, rq, NULL); + i915_request_add(rq); + + return i915_active_wait(&overlay->last_flip); } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, struct i915_vma *vma) { enum pipe pipe = overlay->crtc->pipe; + struct intel_frontbuffer *from = NULL, *to = NULL; WARN_ON(overlay->old_vma); - i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, - vma ? vma->obj : NULL, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + if (overlay->vma) + from = intel_frontbuffer_get(overlay->vma->obj); + if (vma) + to = intel_frontbuffer_get(vma->obj); + + intel_frontbuffer_track(from, to, INTEL_FRONTBUFFER_OVERLAY(pipe)); + + if (to) + intel_frontbuffer_put(to); + if (from) + intel_frontbuffer_put(from); intel_frontbuffer_flip_prepare(overlay->i915, INTEL_FRONTBUFFER_OVERLAY(pipe)); @@ -318,7 +325,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - rq = alloc_request(overlay); + rq = alloc_request(overlay, NULL); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -333,8 +340,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, vma); - - intel_overlay_submit_request(overlay, rq, NULL); + i915_request_add(rq); return 0; } @@ -355,20 +361,13 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) } static void -intel_overlay_release_old_vid_tail(struct i915_active_request *active, - struct i915_request *rq) +intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) { - struct intel_overlay *overlay = - container_of(active, typeof(*overlay), last_flip); - intel_overlay_release_old_vma(overlay); } -static void intel_overlay_off_tail(struct i915_active_request *active, - struct i915_request *rq) +static void intel_overlay_off_tail(struct intel_overlay *overlay) { - struct intel_overlay *overlay = - container_of(active, typeof(*overlay), last_flip); struct drm_i915_private *dev_priv = overlay->i915; intel_overlay_release_old_vma(overlay); @@ -381,6 +380,16 @@ static void intel_overlay_off_tail(struct i915_active_request *active, i830_overlay_clock_gating(dev_priv, true); } +static void +intel_overlay_last_flip_retire(struct i915_active *active) +{ + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); + + if (overlay->flip_complete) + overlay->flip_complete(overlay); +} + /* overlay needs to be disabled in OCMD reg */ static int intel_overlay_off(struct intel_overlay *overlay) { @@ -395,7 +404,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - rq = alloc_request(overlay); + rq = alloc_request(overlay, intel_overlay_off_tail); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -418,17 +427,16 @@ static int intel_overlay_off(struct intel_overlay *overlay) intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, NULL); + i915_request_add(rq); - return intel_overlay_do_wait_request(overlay, rq, - intel_overlay_off_tail); + return i915_active_wait(&overlay->last_flip); } /* recover from an interruption due to a signal * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - return i915_active_request_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); + return i915_active_wait(&overlay->last_flip); } /* Wait for pending overlay flip and release old frame. @@ -438,43 +446,38 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + struct i915_request *rq; u32 *cs; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* Only wait if there is actually an old frame to release to + /* + * Only wait if there is actually an old frame to release to * guarantee forward progress. */ if (!overlay->old_vma) return 0; - if (I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { - /* synchronous slowpath */ - struct i915_request *rq; + if (!(I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT)) { + intel_overlay_release_old_vid_tail(overlay); + return 0; + } - rq = alloc_request(overlay); - if (IS_ERR(rq)) - return PTR_ERR(rq); + rq = alloc_request(overlay, intel_overlay_release_old_vid_tail); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); - } + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } - *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); - ret = intel_overlay_do_wait_request(overlay, rq, - intel_overlay_release_old_vid_tail); - if (ret) - return ret; - } else - intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL); + i915_request_add(rq); - return 0; + return i915_active_wait(&overlay->last_flip); } void intel_overlay_reset(struct drm_i915_private *dev_priv) @@ -674,8 +677,8 @@ static void update_colorkey(struct intel_overlay *overlay, if (overlay->color_key_enabled) flags |= DST_KEY_ENABLE; - if (state->base.visible) - format = state->base.fb->format->format; + if (state->uapi.visible) + format = state->hw.fb->format->format; switch (format) { case DRM_FORMAT_C8: @@ -756,7 +759,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct i915_vma *vma; int ret, tmp_width; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); @@ -765,19 +767,13 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); - i915_gem_object_lock(new_bo); vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, PIN_MAPPABLE); - i915_gem_object_unlock(new_bo); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_pin_section; } - intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB); - - ret = i915_vma_put_fence(vma); - if (ret) - goto out_unpin; + i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); if (!overlay->active) { u32 oconfig; @@ -861,7 +857,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); @@ -1077,11 +1072,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (!(params->flags & I915_OVERLAY_ENABLE)) { drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); - ret = intel_overlay_switch_off(overlay); - - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1097,7 +1088,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, return -ENOENT; drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); @@ -1161,14 +1151,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (ret != 0) goto out_unlock; - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); return 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); @@ -1242,7 +1230,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, } drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); ret = -EINVAL; if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) { @@ -1299,7 +1286,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, ret = 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1312,15 +1298,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) struct i915_vma *vma; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1341,29 +1323,33 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; - mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } void intel_overlay_setup(struct drm_i915_private *dev_priv) { struct intel_overlay *overlay; + struct intel_engine_cs *engine; int ret; if (!HAS_OVERLAY(dev_priv)) return; + engine = dev_priv->engine[RCS0]; + if (!engine || !engine->kernel_context) + return; + overlay = kzalloc(sizeof(*overlay), GFP_KERNEL); if (!overlay) return; overlay->i915 = dev_priv; + overlay->context = engine->kernel_context; + GEM_BUG_ON(!overlay->context); overlay->color_key = 0x0101fe; overlay->color_key_enabled = true; @@ -1371,7 +1357,8 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - INIT_ACTIVE_REQUEST(&overlay->last_flip); + i915_active_init(&overlay->last_flip, + NULL, intel_overlay_last_flip_retire); ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); if (ret) @@ -1405,6 +1392,7 @@ void intel_overlay_cleanup(struct drm_i915_private *dev_priv) WARN_ON(overlay->active); i915_gem_object_put(overlay->reg_bo); + i915_active_fini(&overlay->last_flip); kfree(overlay); } diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 39d742094065..7b3ec6eb3382 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -35,8 +35,8 @@ #include <linux/pwm.h> #include "intel_connector.h" +#include "intel_display_types.h" #include "intel_dp_aux_backlight.h" -#include "intel_drv.h" #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" @@ -178,7 +178,7 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc, struct intel_crtc_state *pipe_config, int fitting_mode) { - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int x = 0, y = 0, width = 0, height = 0; /* Native modes don't need fitting */ @@ -300,7 +300,7 @@ static inline u32 panel_fitter_scaling(u32 source, u32 target) static void i965_scale_aspect(struct intel_crtc_state *pipe_config, u32 *pfit_control) { - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; u32 scaled_width = adjusted_mode->crtc_hdisplay * pipe_config->pipe_src_h; u32 scaled_height = pipe_config->pipe_src_w * @@ -321,7 +321,7 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config, u32 *pfit_control, u32 *pfit_pgm_ratios, u32 *border) { - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; u32 scaled_width = adjusted_mode->crtc_hdisplay * pipe_config->pipe_src_h; u32 scaled_height = pipe_config->pipe_src_w * @@ -380,7 +380,7 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, { struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); u32 pfit_control = 0, pfit_pgm_ratios = 0, border = 0; - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; /* Native modes don't need fitting */ if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w && @@ -1047,7 +1047,7 @@ static void vlv_enable_backlight(const struct intel_crtc_state *crtc_state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; u32 ctl, ctl2; ctl2 = I915_READ(VLV_BLC_PWM_CTL2(pipe)); @@ -1077,7 +1077,7 @@ static void bxt_enable_backlight(const struct intel_crtc_state *crtc_state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; u32 pwm_ctl, val; /* Controller 1 uses the utility pin. */ @@ -1189,7 +1189,7 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; if (!panel->backlight.present) return; @@ -1840,13 +1840,22 @@ static int pwm_setup_backlight(struct intel_connector *connector, enum pipe pipe) { struct drm_device *dev = connector->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_panel *panel = &connector->panel; + const char *desc; int retval; - /* Get the PWM chip for backlight control */ - panel->backlight.pwm = pwm_get(dev->dev, "pwm_backlight"); + /* Get the right PWM chip for DSI backlight according to VBT */ + if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + panel->backlight.pwm = pwm_get(dev->dev, "pwm_pmic_backlight"); + desc = "PMIC"; + } else { + panel->backlight.pwm = pwm_get(dev->dev, "pwm_soc_backlight"); + desc = "SoC"; + } + if (IS_ERR(panel->backlight.pwm)) { - DRM_ERROR("Failed to own the pwm chip\n"); + DRM_ERROR("Failed to get the %s PWM chip\n", desc); panel->backlight.pwm = NULL; return -ENODEV; } @@ -1873,6 +1882,7 @@ static int pwm_setup_backlight(struct intel_connector *connector, CRC_PMIC_PWM_PERIOD_NS); panel->backlight.enabled = panel->backlight.level != 0; + DRM_INFO("Using %s PWM for LCD backlight control\n", desc); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index 1e2c4307d05a..520408e83681 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -30,7 +30,7 @@ #include <linux/seq_file.h> #include "intel_atomic.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_pipe_crc.h" static const char * const pipe_crc_sources[] = { @@ -98,7 +98,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, break; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); switch (dig_port->base.port) { case PORT_B: *source = INTEL_PIPE_CRC_SOURCE_DP_B; @@ -309,13 +309,13 @@ retry: goto put_state; } - pipe_config->base.mode_changed = pipe_config->has_psr; + pipe_config->uapi.mode_changed = pipe_config->has_psr; pipe_config->crc_enabled = enable; if (IS_HASWELL(dev_priv) && - pipe_config->base.active && crtc->pipe == PIPE_A && + pipe_config->hw.active && crtc->pipe == PIPE_A && pipe_config->cpu_transcoder == TRANSCODER_EDP) - pipe_config->base.mode_changed = true; + pipe_config->uapi.mode_changed = true; ret = drm_atomic_commit(state); @@ -667,5 +667,5 @@ void intel_crtc_disable_pipe_crc(struct intel_crtc *intel_crtc) I915_WRITE(PIPE_CRC_CTL(crtc->index), 0); POSTING_READ(PIPE_CRC_CTL(crtc->index)); - synchronize_irq(dev_priv->drm.irq); + intel_synchronize_irq(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 69d908e6a050..89c9cf5f38d2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -26,7 +26,8 @@ #include "display/intel_dp.h" #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_atomic.h" +#include "intel_display_types.h" #include "intel_psr.h" #include "intel_sprite.h" @@ -76,7 +77,7 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { /* Cannot enable DSC and PSR2 simultaneously */ - WARN_ON(crtc_state->dsc_params.compression_enable && + WARN_ON(crtc_state->dsc.compression_enable && crtc_state->has_psr2); switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) { @@ -88,48 +89,35 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, } } -static int edp_psr_shift(enum transcoder cpu_transcoder) +static void psr_irq_control(struct drm_i915_private *dev_priv) { - switch (cpu_transcoder) { - case TRANSCODER_A: - return EDP_PSR_TRANSCODER_A_SHIFT; - case TRANSCODER_B: - return EDP_PSR_TRANSCODER_B_SHIFT; - case TRANSCODER_C: - return EDP_PSR_TRANSCODER_C_SHIFT; - default: - MISSING_CASE(cpu_transcoder); - /* fallthrough */ - case TRANSCODER_EDP: - return EDP_PSR_TRANSCODER_EDP_SHIFT; - } -} + enum transcoder trans_shift; + u32 mask, val; + i915_reg_t imr_reg; -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug) -{ - u32 debug_mask, mask; - enum transcoder cpu_transcoder; - u32 transcoders = BIT(TRANSCODER_EDP); - - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - - debug_mask = 0; - mask = 0; - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); - - mask |= EDP_PSR_ERROR(shift); - debug_mask |= EDP_PSR_POST_EXIT(shift) | - EDP_PSR_PRE_ENTRY(shift); + /* + * gen12+ has registers relative to transcoder and one per transcoder + * using the same bit definition: handle it as TRANSCODER_EDP to force + * 0 shift in bit definition + */ + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; } - if (debug & I915_PSR_DEBUG_IRQ) - mask |= debug_mask; + mask = EDP_PSR_ERROR(trans_shift); + if (dev_priv->psr.debug & I915_PSR_DEBUG_IRQ) + mask |= EDP_PSR_POST_EXIT(trans_shift) | + EDP_PSR_PRE_ENTRY(trans_shift); - I915_WRITE(EDP_PSR_IMR, ~mask); + /* Warning: it is masking/setting reserved bits too */ + val = I915_READ(imr_reg); + val &= ~EDP_PSR_TRANS_MASK(trans_shift); + val |= ~mask; + I915_WRITE(imr_reg, val); } static void psr_event_print(u32 val, bool psr2_enabled) @@ -171,60 +159,58 @@ static void psr_event_print(u32 val, bool psr2_enabled) void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) { - u32 transcoders = BIT(TRANSCODER_EDP); - enum transcoder cpu_transcoder; + enum transcoder cpu_transcoder = dev_priv->psr.transcoder; + enum transcoder trans_shift; + i915_reg_t imr_reg; ktime_t time_ns = ktime_get(); - u32 mask = 0; - - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; + } - if (psr_iir & EDP_PSR_ERROR(shift)) { - DRM_WARN("[transcoder %s] PSR aux error\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_PRE_ENTRY(trans_shift)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } - dev_priv->psr.irq_aux_error = true; + if (psr_iir & EDP_PSR_POST_EXIT(trans_shift)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); - /* - * If this interruption is not masked it will keep - * interrupting so fast that it prevents the scheduled - * work to run. - * Also after a PSR error, we don't want to arm PSR - * again so we don't care about unmask the interruption - * or unset irq_aux_error. - */ - mask |= EDP_PSR_ERROR(shift); - } + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; - if (psr_iir & EDP_PSR_PRE_ENTRY(shift)) { - dev_priv->psr.last_entry_attempt = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", - transcoder_name(cpu_transcoder)); + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); } + } - if (psr_iir & EDP_PSR_POST_EXIT(shift)) { - dev_priv->psr.last_exit = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_ERROR(trans_shift)) { + u32 val; - if (INTEL_GEN(dev_priv) >= 9) { - u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); - bool psr2_enabled = dev_priv->psr.psr2_enabled; + DRM_WARN("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); - I915_WRITE(PSR_EVENT(cpu_transcoder), val); - psr_event_print(val, psr2_enabled); - } - } - } + dev_priv->psr.irq_aux_error = true; - if (mask) { - mask |= I915_READ(EDP_PSR_IMR); - I915_WRITE(EDP_PSR_IMR, mask); + /* + * If this interruption is not masked it will keep + * interrupting so fast that it prevents the scheduled + * work to run. + * Also after a PSR error, we don't want to arm PSR + * again so we don't care about unmask the interruption + * or unset irq_aux_error. + */ + val = I915_READ(imr_reg); + val |= EDP_PSR_ERROR(trans_shift); + I915_WRITE(imr_reg, val); schedule_work(&dev_priv->psr.work); } @@ -283,6 +269,11 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + if (dev_priv->psr.dp) { + DRM_WARN("More than one eDP panel found, PSR support should be extended\n"); + return; + } + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, sizeof(intel_dp->psr_dpcd)); @@ -305,7 +296,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) dev_priv->psr.sink_sync_latency = intel_dp_get_sink_sync_latency(intel_dp); - WARN_ON(dev_priv->psr.dp); dev_priv->psr.dp = intel_dp; if (INTEL_GEN(dev_priv) >= 9 && @@ -390,7 +380,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) BUILD_BUG_ON(sizeof(aux_msg) > 20); for (i = 0; i < sizeof(aux_msg); i += 4) - I915_WRITE(EDP_PSR_AUX_DATA(i >> 2), + I915_WRITE(EDP_PSR_AUX_DATA(dev_priv->psr.transcoder, i >> 2), intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); @@ -401,7 +391,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) /* Select only valid bits for SRD_AUX_CTL */ aux_ctl &= psr_aux_mask; - I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); + I915_WRITE(EDP_PSR_AUX_CTL(dev_priv->psr.transcoder), aux_ctl); } static void intel_psr_enable_sink(struct intel_dp *intel_dp) @@ -412,7 +402,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) /* Enable ALPM at sink for psr2 */ if (dev_priv->psr.psr2_enabled) { drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, - DP_ALPM_ENABLE); + DP_ALPM_ENABLE | + DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE); + dpcd_val |= DP_PSR_ENABLE_PSR2 | DP_PSR_IRQ_HPD_WITH_CRC_ERRORS; } else { if (dev_priv->psr.link_standby) @@ -491,8 +483,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) if (INTEL_GEN(dev_priv) >= 8) val |= EDP_PSR_CRC_ENABLE; - val |= I915_READ(EDP_PSR_CTL) & EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK; - I915_WRITE(EDP_PSR_CTL, val); + val |= (I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & + EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } static void hsw_activate_psr2(struct intel_dp *intel_dp) @@ -528,38 +521,128 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. */ - I915_WRITE(EDP_PSR_CTL, 0); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), 0); + + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static bool +transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) +{ + if (INTEL_GEN(dev_priv) < 9) + return false; + else if (INTEL_GEN(dev_priv) >= 12) + return trans == TRANSCODER_A; + else + return trans == TRANSCODER_EDP; +} + +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +{ + if (!cstate || !cstate->hw.active) + return 0; + + return DIV_ROUND_UP(1000 * 1000, + drm_mode_vrefresh(&cstate->hw.adjusted_mode)); +} + +static void psr2_program_idle_frames(struct drm_i915_private *dev_priv, + u32 idle_frames) +{ + u32 val; + + idle_frames <<= EDP_PSR2_IDLE_FRAME_SHIFT; + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + val &= ~EDP_PSR2_IDLE_FRAME_MASK; + val |= idle_frames; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv) +{ + psr2_program_idle_frames(dev_priv, 0); + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv) +{ + int idle_frames; + + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + /* + * Restore PSR2 idle frame let's use 6 as the minimum to cover all known + * cases including the off-by-one issue that HW has in some cases. + */ + idle_frames = max(6, dev_priv->vbt.psr.idle_frames); + idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1); + psr2_program_idle_frames(dev_priv, idle_frames); +} + +static void tgl_dc5_idle_thread(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.idle_work.work); + + mutex_lock(&dev_priv->psr.lock); + /* If delayed work is pending, it is not idle */ + if (delayed_work_pending(&dev_priv->psr.idle_work)) + goto unlock; + + DRM_DEBUG_KMS("DC5/6 idle thread\n"); + tgl_psr2_disable_dc3co(dev_priv); +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + +static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->psr.dc3co_enabled) + return; - I915_WRITE(EDP_PSR2_CTL, val); + cancel_delayed_work(&dev_priv->psr.idle_work); + /* Before PSR2 exit disallow dc3co*/ + tgl_psr2_disable_dc3co(dev_priv); } static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay; - int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay; - int psr_max_h = 0, psr_max_v = 0; + int crtc_hdisplay = crtc_state->hw.adjusted_mode.crtc_hdisplay; + int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay; + int psr_max_h = 0, psr_max_v = 0, max_bpp = 0; if (!dev_priv->psr.sink_psr2_support) return false; + if (!transcoder_has_psr2(dev_priv, crtc_state->cpu_transcoder)) { + DRM_DEBUG_KMS("PSR2 not supported in transcoder %s\n", + transcoder_name(crtc_state->cpu_transcoder)); + return false; + } + /* * DSC and PSR2 cannot be enabled simultaneously. If a requested * resolution requires DSC to be enabled, priority is given to DSC * over PSR2. */ - if (crtc_state->dsc_params.compression_enable) { + if (crtc_state->dsc.compression_enable) { DRM_DEBUG_KMS("PSR2 cannot be enabled since DSC is enabled\n"); return false; } - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12) { + psr_max_h = 5120; + psr_max_v = 3200; + max_bpp = 30; + } else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { psr_max_h = 4096; psr_max_v = 2304; + max_bpp = 24; } else if (IS_GEN(dev_priv, 9)) { psr_max_h = 3640; psr_max_v = 2304; + max_bpp = 24; } if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { @@ -569,6 +652,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + if (crtc_state->pipe_bpp > max_bpp) { + DRM_DEBUG_KMS("PSR2 not enabled, pipe bpp %d > max supported %d\n", + crtc_state->pipe_bpp, max_bpp); + return false; + } + /* * HW sends SU blocks of size four scan lines, which means the starting * X coordinate and Y granularity requirements will always be met. We @@ -595,7 +684,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; int psr_setup_time; if (!CAN_PSR(dev_priv)) @@ -606,10 +695,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, /* * HSW spec explicitly says PSR is tied to port A. - * BDW+ platforms with DDI implementation of PSR have different - * PSR registers per transcoder and we only implement transcoder EDP - * ones. Since by Display design transcoder EDP is tied to port A - * we can safely escape based on the port A. + * BDW+ platforms have a instance of PSR registers per transcoder but + * for now it only supports one instance of PSR, so lets keep it + * hardcoded to PORT_A */ if (dig_port->base.port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); @@ -648,9 +736,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) + WARN_ON(I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)) & EDP_PSR2_ENABLE); + + WARN_ON(I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); @@ -663,25 +752,6 @@ static void intel_psr_activate(struct intel_dp *intel_dp) dev_priv->psr.active = true; } -static i915_reg_t gen9_chicken_trans_reg(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder) -{ - static const i915_reg_t regs[] = { - [TRANSCODER_A] = CHICKEN_TRANS_A, - [TRANSCODER_B] = CHICKEN_TRANS_B, - [TRANSCODER_C] = CHICKEN_TRANS_C, - [TRANSCODER_EDP] = CHICKEN_TRANS_EDP, - }; - - WARN_ON(INTEL_GEN(dev_priv) < 9); - - if (WARN_ON(cpu_transcoder >= ARRAY_SIZE(regs) || - !regs[cpu_transcoder].reg)) - cpu_transcoder = TRANSCODER_A; - - return regs[cpu_transcoder]; -} - static void intel_psr_enable_source(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -697,8 +767,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (dev_priv->psr.psr2_enabled && (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv))) { - i915_reg_t reg = gen9_chicken_trans_reg(dev_priv, - cpu_transcoder); + i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder); u32 chicken = I915_READ(reg); chicken |= PSR2_VSC_ENABLE_PROG_HEADER | @@ -720,19 +789,46 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (INTEL_GEN(dev_priv) < 11) mask |= EDP_PSR_DEBUG_MASK_DISP_REG_WRITE; - I915_WRITE(EDP_PSR_DEBUG, mask); + I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask); + + psr_irq_control(dev_priv); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = dev_priv->psr.dp; + u32 val; WARN_ON(dev_priv->psr.enabled); dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state); dev_priv->psr.busy_frontbuffer_bits = 0; - dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + dev_priv->psr.pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; + dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline; + dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state); + dev_priv->psr.transcoder = crtc_state->cpu_transcoder; + + /* + * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR + * will still keep the error set even after the reset done in the + * irq_preinstall and irq_uninstall hooks. + * And enabling in this situation cause the screen to freeze in the + * first time that PSR HW tries to activate so lets keep PSR disabled + * to avoid any rendering problems. + */ + if (INTEL_GEN(dev_priv) >= 12) { + val = I915_READ(TRANS_PSR_IIR(dev_priv->psr.transcoder)); + val &= EDP_PSR_ERROR(0); + } else { + val = I915_READ(EDP_PSR_IIR); + val &= EDP_PSR_ERROR(dev_priv->psr.transcoder); + } + if (val) { + dev_priv->psr.sink_not_reliable = true; + DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n"); + return; + } DRM_DEBUG_KMS("Enabling PSR%s\n", dev_priv->psr.psr2_enabled ? "2" : "1"); @@ -782,20 +878,28 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) u32 val; if (!dev_priv->psr.active) { - if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) { + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR2_ENABLE); + } + + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR_ENABLE); + return; } if (dev_priv->psr.psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + tgl_disallow_dc3co_on_psr2_exit(dev_priv); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR2_ENABLE)); - I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); + val &= ~EDP_PSR2_ENABLE; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR_ENABLE)); - I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE); + val &= ~EDP_PSR_ENABLE; + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } dev_priv->psr.active = false; } @@ -817,21 +921,24 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_psr_exit(dev_priv); if (dev_priv->psr.psr2_enabled) { - psr_status = EDP_PSR2_STATUS; + psr_status = EDP_PSR2_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; } else { - psr_status = EDP_PSR_STATUS; + psr_status = EDP_PSR_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR_STATUS_STATE_MASK; } /* Wait till PSR is idle */ - if (intel_wait_for_register(&dev_priv->uncore, - psr_status, psr_status_mask, 0, 2000)) + if (intel_de_wait_for_clear(dev_priv, psr_status, + psr_status_mask, 2000)) DRM_ERROR("Timed out waiting PSR idle state\n"); /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); + if (dev_priv->psr.psr2_enabled) + drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, 0); + dev_priv->psr.enabled = false; } @@ -859,6 +966,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, mutex_unlock(&dev_priv->psr.lock); cancel_work_sync(&dev_priv->psr.work); + cancel_delayed_work_sync(&dev_priv->psr.idle_work); } static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) @@ -946,7 +1054,7 @@ unlock: int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, u32 *out_value) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (!dev_priv->psr.enabled || !new_crtc_state->has_psr) @@ -963,7 +1071,8 @@ int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, * defensive enough to cover everything. */ - return __intel_wait_for_register(&dev_priv->uncore, EDP_PSR_STATUS, + return __intel_wait_for_register(&dev_priv->uncore, + EDP_PSR_STATUS(dev_priv->psr.transcoder), EDP_PSR_STATUS_STATE_MASK, EDP_PSR_STATUS_STATE_IDLE, 2, 50, out_value); @@ -979,16 +1088,16 @@ static bool __psr_wait_for_idle_locked(struct drm_i915_private *dev_priv) return false; if (dev_priv->psr.psr2_enabled) { - reg = EDP_PSR2_STATUS; + reg = EDP_PSR2_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR2_STATUS_STATE_MASK; } else { - reg = EDP_PSR_STATUS; + reg = EDP_PSR_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR_STATUS_STATE_MASK; } mutex_unlock(&dev_priv->psr.lock); - err = intel_wait_for_register(&dev_priv->uncore, reg, mask, 0, 50); + err = intel_de_wait_for_clear(dev_priv, reg, mask, 50); if (err) DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); @@ -1002,7 +1111,7 @@ static int intel_psr_fastset_force(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; struct drm_modeset_acquire_ctx ctx; struct drm_atomic_state *state; - struct drm_crtc *crtc; + struct intel_crtc *crtc; int err; state = drm_atomic_state_alloc(dev); @@ -1013,21 +1122,18 @@ static int intel_psr_fastset_force(struct drm_i915_private *dev_priv) state->acquire_ctx = &ctx; retry: - drm_for_each_crtc(crtc, dev) { - struct drm_crtc_state *crtc_state; - struct intel_crtc_state *intel_crtc_state; + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_crtc_state(state, crtc); - crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { err = PTR_ERR(crtc_state); goto error; } - intel_crtc_state = to_intel_crtc_state(crtc_state); - - if (crtc_state->active && intel_crtc_state->has_psr) { + if (crtc_state->hw.active && crtc_state->has_psr) { /* Mark mode as changed to trigger a pipe->update() */ - crtc_state->mode_changed = true; + crtc_state->uapi.mode_changed = true; break; } } @@ -1067,7 +1173,13 @@ int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 val) old_mode = dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK; dev_priv->psr.debug = val; - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + + /* + * Do it right away if it's already enabled, otherwise it will be done + * when enabling the source. + */ + if (dev_priv->psr.enabled) + psr_irq_control(dev_priv); mutex_unlock(&dev_priv->psr.lock); @@ -1159,6 +1271,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->psr.lock); } +/* + * When we will be completely rely on PSR2 S/W tracking in future, + * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP + * event also therefore tgl_dc3co_flush() require to be changed + * accrodingly in future. + */ +static void +tgl_dc3co_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, enum fb_op_origin origin) +{ + u32 delay; + + mutex_lock(&dev_priv->psr.lock); + + if (!dev_priv->psr.dc3co_enabled) + goto unlock; + + if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active) + goto unlock; + + /* + * At every frontbuffer flush flip event modified delay of delayed work, + * when delayed work schedules that means display has been idle. + */ + if (!(frontbuffer_bits & + INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe))) + goto unlock; + + tgl_psr2_enable_dc3co(dev_priv); + /* DC5/DC6 required idle frames = 6 */ + delay = 6 * dev_priv->psr.dc3co_exit_delay; + mod_delayed_work(system_wq, &dev_priv->psr.idle_work, + usecs_to_jiffies(delay)); + +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + /** * intel_psr_flush - Flush PSR * @dev_priv: i915 device @@ -1178,8 +1328,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; - if (origin == ORIGIN_FLIP) + if (origin == ORIGIN_FLIP) { + tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin); return; + } mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { @@ -1208,53 +1360,111 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, */ void intel_psr_init(struct drm_i915_private *dev_priv) { - u32 val; - if (!HAS_PSR(dev_priv)) return; - dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? - HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; - if (!dev_priv->psr.sink_support) return; + if (IS_HASWELL(dev_priv)) + /* + * HSW don't have PSR registers on the same space as transcoder + * so set this to a value that when subtract to the register + * in transcoder space results in the right offset for HSW + */ + dev_priv->hsw_psr_mmio_adjust = _SRD_CTL_EDP - _HSW_EDP_PSR_BASE; + if (i915_modparams.enable_psr == -1) if (INTEL_GEN(dev_priv) < 9 || !dev_priv->vbt.psr.enable) i915_modparams.enable_psr = 0; - /* - * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR - * will still keep the error set even after the reset done in the - * irq_preinstall and irq_uninstall hooks. - * And enabling in this situation cause the screen to freeze in the - * first time that PSR HW tries to activate so lets keep PSR disabled - * to avoid any rendering problems. - */ - val = I915_READ(EDP_PSR_IIR); - val &= EDP_PSR_ERROR(edp_psr_shift(TRANSCODER_EDP)); - if (val) { - DRM_DEBUG_KMS("PSR interruption error set\n"); - dev_priv->psr.sink_not_reliable = true; - } - /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) /* HSW and BDW require workarounds that we don't implement. */ dev_priv->psr.link_standby = false; - else - /* For new platforms let's respect VBT back again */ + else if (INTEL_GEN(dev_priv) < 12) + /* For new platforms up to TGL let's respect VBT back again */ dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; INIT_WORK(&dev_priv->psr.work, intel_psr_work); + INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread); mutex_init(&dev_priv->psr.lock); } -void intel_psr_short_pulse(struct intel_dp *intel_dp) +static int psr_get_status_and_error_status(struct intel_dp *intel_dp, + u8 *status, u8 *error_status) +{ + struct drm_dp_aux *aux = &intel_dp->aux; + int ret; + + ret = drm_dp_dpcd_readb(aux, DP_PSR_STATUS, status); + if (ret != 1) + return ret; + + ret = drm_dp_dpcd_readb(aux, DP_PSR_ERROR_STATUS, error_status); + if (ret != 1) + return ret; + + *status = *status & DP_PSR_SINK_STATE_MASK; + + return 0; +} + +static void psr_alpm_check(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_dp_aux *aux = &intel_dp->aux; + struct i915_psr *psr = &dev_priv->psr; + u8 val; + int r; + + if (!psr->psr2_enabled) + return; + + r = drm_dp_dpcd_readb(aux, DP_RECEIVER_ALPM_STATUS, &val); + if (r != 1) { + DRM_ERROR("Error reading ALPM status\n"); + return; + } + + if (val & DP_ALPM_LOCK_TIMEOUT_ERROR) { + intel_psr_disable_locked(intel_dp); + psr->sink_not_reliable = true; + DRM_DEBUG_KMS("ALPM lock timeout error, disabling PSR\n"); + + /* Clearing error */ + drm_dp_dpcd_writeb(aux, DP_RECEIVER_ALPM_STATUS, val); + } +} + +static void psr_capability_changed_check(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct i915_psr *psr = &dev_priv->psr; u8 val; + int r; + + r = drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_ESI, &val); + if (r != 1) { + DRM_ERROR("Error reading DP_PSR_ESI\n"); + return; + } + + if (val & DP_PSR_CAPS_CHANGE) { + intel_psr_disable_locked(intel_dp); + psr->sink_not_reliable = true; + DRM_DEBUG_KMS("Sink PSR capability changed, disabling PSR\n"); + + /* Clearing it */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ESI, val); + } +} + +void intel_psr_short_pulse(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct i915_psr *psr = &dev_priv->psr; + u8 status, error_status; const u8 errors = DP_PSR_RFB_STORAGE_ERROR | DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR | DP_PSR_LINK_CRC_ERROR; @@ -1267,38 +1477,34 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) if (!psr->enabled || psr->dp != intel_dp) goto exit; - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_STATUS, &val) != 1) { - DRM_ERROR("PSR_STATUS dpcd read failed\n"); + if (psr_get_status_and_error_status(intel_dp, &status, &error_status)) { + DRM_ERROR("Error reading PSR status or error status\n"); goto exit; } - if ((val & DP_PSR_SINK_STATE_MASK) == DP_PSR_SINK_INTERNAL_ERROR) { - DRM_DEBUG_KMS("PSR sink internal error, disabling PSR\n"); + if (status == DP_PSR_SINK_INTERNAL_ERROR || (error_status & errors)) { intel_psr_disable_locked(intel_dp); psr->sink_not_reliable = true; } - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_ERROR_STATUS, &val) != 1) { - DRM_ERROR("PSR_ERROR_STATUS dpcd read failed\n"); - goto exit; - } - - if (val & DP_PSR_RFB_STORAGE_ERROR) + if (status == DP_PSR_SINK_INTERNAL_ERROR && !error_status) + DRM_DEBUG_KMS("PSR sink internal error, disabling PSR\n"); + if (error_status & DP_PSR_RFB_STORAGE_ERROR) DRM_DEBUG_KMS("PSR RFB storage error, disabling PSR\n"); - if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) + if (error_status & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n"); - if (val & DP_PSR_LINK_CRC_ERROR) - DRM_ERROR("PSR Link CRC error, disabling PSR\n"); + if (error_status & DP_PSR_LINK_CRC_ERROR) + DRM_DEBUG_KMS("PSR Link CRC error, disabling PSR\n"); - if (val & ~errors) + if (error_status & ~errors) DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n", - val & ~errors); - if (val & errors) { - intel_psr_disable_locked(intel_dp); - psr->sink_not_reliable = true; - } + error_status & ~errors); /* clear status register */ - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ERROR_STATUS, val); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ERROR_STATUS, error_status); + + psr_alpm_check(intel_dp); + psr_capability_changed_check(intel_dp); + exit: mutex_unlock(&psr->lock); } @@ -1317,3 +1523,27 @@ bool intel_psr_enabled(struct intel_dp *intel_dp) return ret; } + +void intel_psr_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state) +{ + struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct intel_connector *intel_connector; + struct intel_digital_port *dig_port; + struct drm_crtc_state *crtc_state; + + if (!CAN_PSR(dev_priv) || !new_state->crtc || + dev_priv->psr.initially_probed) + return; + + intel_connector = to_intel_connector(connector); + dig_port = enc_to_dig_port(intel_connector->encoder); + if (dev_priv->psr.dp != &dig_port->dp) + return; + + crtc_state = drm_atomic_get_new_crtc_state(new_state->state, + new_state->crtc); + crtc_state->mode_changed = true; + dev_priv->psr.initially_probed = true; +} diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index dc818826f36d..c58a1d438808 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -8,6 +8,8 @@ #include "intel_frontbuffer.h" +struct drm_connector; +struct drm_connector_state; struct drm_i915_private; struct intel_crtc_state; struct intel_dp; @@ -30,11 +32,13 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug); void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, u32 *out_value); bool intel_psr_enabled(struct intel_dp *intel_dp); +void intel_psr_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state); #endif /* __INTEL_PSR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 0b749c28541f..399b1542509f 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -5,7 +5,7 @@ #include <linux/dmi.h> -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_quirks.h" /* diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index ceda03e5a3d4..e8819fd21e03 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -39,7 +39,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" #include "intel_hdmi.h" @@ -180,7 +180,7 @@ static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder) return container_of(encoder, struct intel_sdvo, base); } -static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector) +static struct intel_sdvo *intel_attached_sdvo(struct intel_connector *connector) { return to_sdvo(intel_attached_encoder(connector)); } @@ -274,130 +274,145 @@ static bool intel_sdvo_read_byte(struct intel_sdvo *intel_sdvo, u8 addr, u8 *ch) return false; } -#define SDVO_CMD_NAME_ENTRY(cmd) {cmd, #cmd} +#define SDVO_CMD_NAME_ENTRY(cmd_) { .cmd = SDVO_CMD_ ## cmd_, .name = #cmd_ } + /** Mapping of command numbers to names, for debug output */ -static const struct _sdvo_cmd_name { +static const struct { u8 cmd; const char *name; } __attribute__ ((packed)) sdvo_cmd_names[] = { - SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS), + SDVO_CMD_NAME_ENTRY(RESET), + SDVO_CMD_NAME_ENTRY(GET_DEVICE_CAPS), + SDVO_CMD_NAME_ENTRY(GET_FIRMWARE_REV), + SDVO_CMD_NAME_ENTRY(GET_TRAINED_INPUTS), + SDVO_CMD_NAME_ENTRY(GET_ACTIVE_OUTPUTS), + SDVO_CMD_NAME_ENTRY(SET_ACTIVE_OUTPUTS), + SDVO_CMD_NAME_ENTRY(GET_IN_OUT_MAP), + SDVO_CMD_NAME_ENTRY(SET_IN_OUT_MAP), + SDVO_CMD_NAME_ENTRY(GET_ATTACHED_DISPLAYS), + SDVO_CMD_NAME_ENTRY(GET_HOT_PLUG_SUPPORT), + SDVO_CMD_NAME_ENTRY(SET_ACTIVE_HOT_PLUG), + SDVO_CMD_NAME_ENTRY(GET_ACTIVE_HOT_PLUG), + SDVO_CMD_NAME_ENTRY(GET_INTERRUPT_EVENT_SOURCE), + SDVO_CMD_NAME_ENTRY(SET_TARGET_INPUT), + SDVO_CMD_NAME_ENTRY(SET_TARGET_OUTPUT), + SDVO_CMD_NAME_ENTRY(GET_INPUT_TIMINGS_PART1), + SDVO_CMD_NAME_ENTRY(GET_INPUT_TIMINGS_PART2), + SDVO_CMD_NAME_ENTRY(SET_INPUT_TIMINGS_PART1), + SDVO_CMD_NAME_ENTRY(SET_INPUT_TIMINGS_PART2), + SDVO_CMD_NAME_ENTRY(SET_OUTPUT_TIMINGS_PART1), + SDVO_CMD_NAME_ENTRY(SET_OUTPUT_TIMINGS_PART2), + SDVO_CMD_NAME_ENTRY(GET_OUTPUT_TIMINGS_PART1), + SDVO_CMD_NAME_ENTRY(GET_OUTPUT_TIMINGS_PART2), + SDVO_CMD_NAME_ENTRY(CREATE_PREFERRED_INPUT_TIMING), + SDVO_CMD_NAME_ENTRY(GET_PREFERRED_INPUT_TIMING_PART1), + SDVO_CMD_NAME_ENTRY(GET_PREFERRED_INPUT_TIMING_PART2), + SDVO_CMD_NAME_ENTRY(GET_INPUT_PIXEL_CLOCK_RANGE), + SDVO_CMD_NAME_ENTRY(GET_OUTPUT_PIXEL_CLOCK_RANGE), + SDVO_CMD_NAME_ENTRY(GET_SUPPORTED_CLOCK_RATE_MULTS), + SDVO_CMD_NAME_ENTRY(GET_CLOCK_RATE_MULT), + SDVO_CMD_NAME_ENTRY(SET_CLOCK_RATE_MULT), + SDVO_CMD_NAME_ENTRY(GET_SUPPORTED_TV_FORMATS), + SDVO_CMD_NAME_ENTRY(GET_TV_FORMAT), + SDVO_CMD_NAME_ENTRY(SET_TV_FORMAT), + SDVO_CMD_NAME_ENTRY(GET_SUPPORTED_POWER_STATES), + SDVO_CMD_NAME_ENTRY(GET_POWER_STATE), + SDVO_CMD_NAME_ENTRY(SET_ENCODER_POWER_STATE), + SDVO_CMD_NAME_ENTRY(SET_DISPLAY_POWER_STATE), + SDVO_CMD_NAME_ENTRY(SET_CONTROL_BUS_SWITCH), + SDVO_CMD_NAME_ENTRY(GET_SDTV_RESOLUTION_SUPPORT), + SDVO_CMD_NAME_ENTRY(GET_SCALED_HDTV_RESOLUTION_SUPPORT), + SDVO_CMD_NAME_ENTRY(GET_SUPPORTED_ENHANCEMENTS), /* Add the op code for SDVO enhancements */ - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_VPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_VPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_VPOS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_2D), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_2D), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_2D), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SHARPNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SHARPNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SHARPNESS), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DOT_CRAWL), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DOT_CRAWL), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_CHROMA_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_CHROMA_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_CHROMA_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_LUMA_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_LUMA_FILTER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_LUMA_FILTER), + SDVO_CMD_NAME_ENTRY(GET_MAX_HPOS), + SDVO_CMD_NAME_ENTRY(GET_HPOS), + SDVO_CMD_NAME_ENTRY(SET_HPOS), + SDVO_CMD_NAME_ENTRY(GET_MAX_VPOS), + SDVO_CMD_NAME_ENTRY(GET_VPOS), + SDVO_CMD_NAME_ENTRY(SET_VPOS), + SDVO_CMD_NAME_ENTRY(GET_MAX_SATURATION), + SDVO_CMD_NAME_ENTRY(GET_SATURATION), + SDVO_CMD_NAME_ENTRY(SET_SATURATION), + SDVO_CMD_NAME_ENTRY(GET_MAX_HUE), + SDVO_CMD_NAME_ENTRY(GET_HUE), + SDVO_CMD_NAME_ENTRY(SET_HUE), + SDVO_CMD_NAME_ENTRY(GET_MAX_CONTRAST), + SDVO_CMD_NAME_ENTRY(GET_CONTRAST), + SDVO_CMD_NAME_ENTRY(SET_CONTRAST), + SDVO_CMD_NAME_ENTRY(GET_MAX_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(GET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(GET_MAX_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(GET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(GET_MAX_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(GET_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(SET_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(GET_MAX_FLICKER_FILTER), + SDVO_CMD_NAME_ENTRY(GET_FLICKER_FILTER), + SDVO_CMD_NAME_ENTRY(SET_FLICKER_FILTER), + SDVO_CMD_NAME_ENTRY(GET_MAX_FLICKER_FILTER_ADAPTIVE), + SDVO_CMD_NAME_ENTRY(GET_FLICKER_FILTER_ADAPTIVE), + SDVO_CMD_NAME_ENTRY(SET_FLICKER_FILTER_ADAPTIVE), + SDVO_CMD_NAME_ENTRY(GET_MAX_FLICKER_FILTER_2D), + SDVO_CMD_NAME_ENTRY(GET_FLICKER_FILTER_2D), + SDVO_CMD_NAME_ENTRY(SET_FLICKER_FILTER_2D), + SDVO_CMD_NAME_ENTRY(GET_MAX_SHARPNESS), + SDVO_CMD_NAME_ENTRY(GET_SHARPNESS), + SDVO_CMD_NAME_ENTRY(SET_SHARPNESS), + SDVO_CMD_NAME_ENTRY(GET_DOT_CRAWL), + SDVO_CMD_NAME_ENTRY(SET_DOT_CRAWL), + SDVO_CMD_NAME_ENTRY(GET_MAX_TV_CHROMA_FILTER), + SDVO_CMD_NAME_ENTRY(GET_TV_CHROMA_FILTER), + SDVO_CMD_NAME_ENTRY(SET_TV_CHROMA_FILTER), + SDVO_CMD_NAME_ENTRY(GET_MAX_TV_LUMA_FILTER), + SDVO_CMD_NAME_ENTRY(GET_TV_LUMA_FILTER), + SDVO_CMD_NAME_ENTRY(SET_TV_LUMA_FILTER), /* HDMI op code */ - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA), + SDVO_CMD_NAME_ENTRY(GET_SUPP_ENCODE), + SDVO_CMD_NAME_ENTRY(GET_ENCODE), + SDVO_CMD_NAME_ENTRY(SET_ENCODE), + SDVO_CMD_NAME_ENTRY(SET_PIXEL_REPLI), + SDVO_CMD_NAME_ENTRY(GET_PIXEL_REPLI), + SDVO_CMD_NAME_ENTRY(GET_COLORIMETRY_CAP), + SDVO_CMD_NAME_ENTRY(SET_COLORIMETRY), + SDVO_CMD_NAME_ENTRY(GET_COLORIMETRY), + SDVO_CMD_NAME_ENTRY(GET_AUDIO_ENCRYPT_PREFER), + SDVO_CMD_NAME_ENTRY(SET_AUDIO_STAT), + SDVO_CMD_NAME_ENTRY(GET_AUDIO_STAT), + SDVO_CMD_NAME_ENTRY(GET_HBUF_INDEX), + SDVO_CMD_NAME_ENTRY(SET_HBUF_INDEX), + SDVO_CMD_NAME_ENTRY(GET_HBUF_INFO), + SDVO_CMD_NAME_ENTRY(GET_HBUF_AV_SPLIT), + SDVO_CMD_NAME_ENTRY(SET_HBUF_AV_SPLIT), + SDVO_CMD_NAME_ENTRY(GET_HBUF_TXRATE), + SDVO_CMD_NAME_ENTRY(SET_HBUF_TXRATE), + SDVO_CMD_NAME_ENTRY(SET_HBUF_DATA), + SDVO_CMD_NAME_ENTRY(GET_HBUF_DATA), }; +#undef SDVO_CMD_NAME_ENTRY + +static const char *sdvo_cmd_name(u8 cmd) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sdvo_cmd_names); i++) { + if (cmd == sdvo_cmd_names[i].cmd) + return sdvo_cmd_names[i].name; + } + + return NULL; +} + #define SDVO_NAME(svdo) ((svdo)->port == PORT_B ? "SDVOB" : "SDVOC") static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, const void *args, int args_len) { + const char *cmd_name; int i, pos = 0; #define BUF_LEN 256 char buffer[BUF_LEN]; @@ -412,15 +427,12 @@ static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, for (; i < 8; i++) { BUF_PRINT(" "); } - for (i = 0; i < ARRAY_SIZE(sdvo_cmd_names); i++) { - if (cmd == sdvo_cmd_names[i].cmd) { - BUF_PRINT("(%s)", sdvo_cmd_names[i].name); - break; - } - } - if (i == ARRAY_SIZE(sdvo_cmd_names)) { + + cmd_name = sdvo_cmd_name(cmd); + if (cmd_name) + BUF_PRINT("(%s)", cmd_name); + else BUF_PRINT("(%02X)", cmd); - } BUG_ON(pos >= BUF_LEN - 1); #undef BUF_PRINT #undef BUF_LEN @@ -429,15 +441,23 @@ static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, } static const char * const cmd_status_names[] = { - "Power on", - "Success", - "Not supported", - "Invalid arg", - "Pending", - "Target not specified", - "Scaling not supported" + [SDVO_CMD_STATUS_POWER_ON] = "Power on", + [SDVO_CMD_STATUS_SUCCESS] = "Success", + [SDVO_CMD_STATUS_NOTSUPP] = "Not supported", + [SDVO_CMD_STATUS_INVALID_ARG] = "Invalid arg", + [SDVO_CMD_STATUS_PENDING] = "Pending", + [SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED] = "Target not specified", + [SDVO_CMD_STATUS_SCALING_NOT_SUPP] = "Scaling not supported", }; +static const char *sdvo_cmd_status(u8 status) +{ + if (status < ARRAY_SIZE(cmd_status_names)) + return cmd_status_names[status]; + else + return NULL; +} + static bool __intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, const void *args, int args_len, bool unlocked) @@ -516,6 +536,7 @@ static bool intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo, void *response, int response_len) { + const char *cmd_status; u8 retry = 15; /* 5 quick checks, followed by 10 long checks */ u8 status; int i, pos = 0; @@ -562,8 +583,9 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo, #define BUF_PRINT(args...) \ pos += snprintf(buffer + pos, max_t(int, BUF_LEN - pos, 0), args) - if (status <= SDVO_CMD_STATUS_SCALING_NOT_SUPP) - BUF_PRINT("(%s)", cmd_status_names[status]); + cmd_status = sdvo_cmd_status(status); + if (cmd_status) + BUF_PRINT("(%s)", cmd_status); else BUF_PRINT("(??? %d)", status); @@ -929,6 +951,20 @@ static bool intel_sdvo_set_audio_state(struct intel_sdvo *intel_sdvo, &audio_state, 1); } +static bool intel_sdvo_get_hbuf_size(struct intel_sdvo *intel_sdvo, + u8 *hbuf_size) +{ + if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HBUF_INFO, + hbuf_size, 1)) + return false; + + /* Buffer size is 0 based, hooray! However zero means zero. */ + if (*hbuf_size) + (*hbuf_size)++; + + return true; +} + #if 0 static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) { @@ -972,14 +1008,10 @@ static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo, set_buf_index, 2)) return false; - if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HBUF_INFO, - &hbuf_size, 1)) + if (!intel_sdvo_get_hbuf_size(intel_sdvo, &hbuf_size)) return false; - /* Buffer size is 0 based, hooray! */ - hbuf_size++; - - DRM_DEBUG_KMS("writing sdvo hbuf: %i, hbuf_size %i, hbuf_size: %i\n", + DRM_DEBUG_KMS("writing sdvo hbuf: %i, length %u, hbuf_size: %i\n", if_index, length, hbuf_size); if (hbuf_size < length) @@ -1030,14 +1062,10 @@ static ssize_t intel_sdvo_read_infoframe(struct intel_sdvo *intel_sdvo, if (tx_rate == SDVO_HBUF_TX_DISABLED) return 0; - if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HBUF_INFO, - &hbuf_size, 1)) - return -ENXIO; - - /* Buffer size is 0 based, hooray! */ - hbuf_size++; + if (!intel_sdvo_get_hbuf_size(intel_sdvo, &hbuf_size)) + return false; - DRM_DEBUG_KMS("reading sdvo hbuf: %i, hbuf_size %i, hbuf_size: %i\n", + DRM_DEBUG_KMS("reading sdvo hbuf: %i, length %u, hbuf_size: %i\n", if_index, length, hbuf_size); hbuf_size = min_t(unsigned int, length, hbuf_size); @@ -1059,7 +1087,7 @@ static bool intel_sdvo_compute_avi_infoframe(struct intel_sdvo *intel_sdvo, { struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; int ret; if (!crtc_state->has_hdmi_sink) @@ -1248,8 +1276,8 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder, to_intel_sdvo_connector_state(conn_state); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(conn_state->connector); - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - struct drm_display_mode *mode = &pipe_config->base.mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; + struct drm_display_mode *mode = &pipe_config->hw.mode; DRM_DEBUG_KMS("forcing bpc to 8 for SDVO\n"); pipe_config->pipe_bpp = 8*3; @@ -1321,9 +1349,9 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder, if (IS_TV(intel_sdvo_connector)) i9xx_adjust_sdvo_tv_clock(pipe_config); - /* Set user selected PAR to incoming mode's member */ - if (intel_sdvo_connector->is_hdmi) - adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio; + if (conn_state->picture_aspect_ratio) + adjusted_mode->picture_aspect_ratio = + conn_state->picture_aspect_ratio; if (!intel_sdvo_compute_avi_infoframe(intel_sdvo, pipe_config, conn_state)) { @@ -1401,13 +1429,13 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; const struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(conn_state); const struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(conn_state->connector); - const struct drm_display_mode *mode = &crtc_state->base.mode; + const struct drm_display_mode *mode = &crtc_state->hw.mode; struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder); u32 sdvox; struct intel_sdvo_in_out_map in_out; @@ -1523,7 +1551,7 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(&connector->base); - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(&connector->base); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); u16 active_outputs = 0; intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs); @@ -1601,7 +1629,7 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_NVSYNC; } - pipe_config->base.adjusted_mode.flags |= flags; + pipe_config->hw.adjusted_mode.flags |= flags; /* * pixel multiplier readout is tricky: Only on i915g/gm it is stored in @@ -1621,7 +1649,7 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, if (pipe_config->pixel_multiplier) dotclock /= pipe_config->pixel_multiplier; - pipe_config->base.adjusted_mode.crtc_clock = dotclock; + pipe_config->hw.adjusted_mode.crtc_clock = dotclock; /* Cross check the port pixel multiplier with the sdvo encoder state. */ if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_CLOCK_RATE_MULT, @@ -1673,7 +1701,7 @@ static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo, const struct drm_connector_state *conn_state) { const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; u8 *eld = connector->eld; @@ -1695,7 +1723,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; if (old_crtc_state->has_audio) @@ -1757,7 +1785,7 @@ static void intel_enable_sdvo(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); u32 temp; bool input1, input2; int i; @@ -1795,7 +1823,7 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -1893,12 +1921,14 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) &intel_sdvo->hotplug_active, 2); } -static bool intel_sdvo_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector) +static enum intel_hotplug_state +intel_sdvo_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector, + bool irq_received) { intel_sdvo_enable_hotplug(encoder); - return intel_encoder_hotplug(encoder, connector); + return intel_encoder_hotplug(encoder, connector, irq_received); } static bool @@ -1911,7 +1941,7 @@ intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo) static struct edid * intel_sdvo_get_edid(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); return drm_get_edid(connector, &sdvo->ddc); } @@ -1929,7 +1959,7 @@ intel_sdvo_get_analog_edid(struct drm_connector *connector) static enum drm_connector_status intel_sdvo_tmds_sink_detect(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status status; @@ -1998,7 +2028,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { u16 response; - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; @@ -2145,7 +2175,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = { static void intel_sdvo_get_tv_modes(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); const struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_sdtv_resolution_request tv_res; u32 reply = 0, format_map = 0; @@ -2185,7 +2215,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) static void intel_sdvo_get_lvds_modes(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(connector->dev); struct drm_display_mode *newmode; @@ -2349,7 +2379,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, static int intel_sdvo_connector_register(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); int ret; ret = intel_connector_register(connector); @@ -2364,7 +2394,7 @@ intel_sdvo_connector_register(struct drm_connector *connector) static void intel_sdvo_connector_unregister(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); sysfs_remove_link(&connector->kdev->kobj, sdvo->ddc.dev.kobj.name); @@ -2624,7 +2654,6 @@ intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo, intel_attach_broadcast_rgb_property(&connector->base.base); } intel_attach_aspect_ratio_property(&connector->base.base); - connector->base.base.state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; } static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void) @@ -2891,7 +2920,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_sdvo->base.pipe_mask = ~0; return true; } @@ -2903,7 +2932,7 @@ static void intel_sdvo_output_cleanup(struct intel_sdvo *intel_sdvo) list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) { - if (intel_attached_encoder(connector) == &intel_sdvo->base) { + if (intel_attached_encoder(to_intel_connector(connector)) == &intel_sdvo->base) { drm_connector_unregister(connector); intel_connector_destroy(connector); } diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.h b/drivers/gpu/drm/i915/display/intel_sdvo.h index c9e05bcdd141..a66f224aa17d 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.h +++ b/drivers/gpu/drm/i915/display/intel_sdvo.h @@ -14,6 +14,7 @@ struct drm_i915_private; enum pipe; +enum port; bool intel_sdvo_port_enabled(struct drm_i915_private *dev_priv, i915_reg_t sdvo_reg, enum pipe *pipe); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 004b52027ae8..fca77ec1e0dd 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -40,26 +40,14 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_trace.h" #include "intel_atomic_plane.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_frontbuffer.h" #include "intel_pm.h" #include "intel_psr.h" #include "intel_sprite.h" -bool is_planar_yuv_format(u32 pixelformat) -{ - switch (pixelformat) { - case DRM_FORMAT_NV12: - case DRM_FORMAT_P010: - case DRM_FORMAT_P012: - case DRM_FORMAT_P016: - return true; - default: - return false; - } -} - int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs) { @@ -93,9 +81,9 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, */ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_display_mode *adjusted_mode = &new_crtc_state->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &new_crtc_state->hw.adjusted_mode; long timeout = msecs_to_jiffies_timeout(1); int scanline, min, max, vblank_start; wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); @@ -132,7 +120,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; - trace_i915_pipe_update_start(crtc); + trace_intel_pipe_update_start(crtc); for (;;) { /* @@ -185,7 +173,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) crtc->debug.start_vbl_time = ktime_get(); crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc); - trace_i915_pipe_update_vblank_evaded(crtc); + trace_intel_pipe_update_vblank_evaded(crtc); return; irq_disable: @@ -202,27 +190,28 @@ irq_disable: */ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; int scanline_end = intel_get_crtc_scanline(crtc); u32 end_vbl_count = intel_crtc_get_vblank_counter(crtc); ktime_t end_vbl_time = ktime_get(); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end); + trace_intel_pipe_update_end(crtc, end_vbl_count, scanline_end); /* We're still in the vblank-evade critical section, this can't race. * Would be slightly nice to just grab the vblank count and arm the * event outside of the critical section - the spinlock might spin for a * while ... */ - if (new_crtc_state->base.event) { + if (new_crtc_state->uapi.event) { WARN_ON(drm_crtc_vblank_get(&crtc->base) != 0); spin_lock(&crtc->base.dev->event_lock); - drm_crtc_arm_vblank_event(&crtc->base, new_crtc_state->base.event); + drm_crtc_arm_vblank_event(&crtc->base, + new_crtc_state->uapi.event); spin_unlock(&crtc->base.dev->event_lock); - new_crtc_state->base.event = NULL; + new_crtc_state->uapi.event = NULL; } local_irq_enable(); @@ -251,9 +240,9 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) int intel_plane_check_stride(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; u32 stride, max_stride; /* @@ -263,7 +252,7 @@ int intel_plane_check_stride(const struct intel_plane_state *plane_state) * kick in due the plane being invisible. */ if (intel_plane_can_remap(plane_state) && - !plane_state->base.visible) + !plane_state->uapi.visible) return 0; /* FIXME other color planes? */ @@ -283,10 +272,10 @@ int intel_plane_check_stride(const struct intel_plane_state *plane_state) int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_rect *src = &plane_state->base.src; + const struct drm_framebuffer *fb = plane_state->hw.fb; + struct drm_rect *src = &plane_state->uapi.src; u32 src_x, src_y, src_w, src_h, hsub, vsub; - bool rotated = drm_rotation_90_or_270(plane_state->base.rotation); + bool rotated = drm_rotation_90_or_270(plane_state->hw.rotation); /* * Hardware doesn't handle subpixel coordinates. @@ -299,10 +288,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; + drm_rect_init(src, src_x << 16, src_y << 16, + src_w << 16, src_h << 16); if (!fb->format->is_yuv) return 0; @@ -330,6 +317,61 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) return 0; } +bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) +{ + return INTEL_GEN(dev_priv) >= 11 && + icl_hdr_plane_mask() & BIT(plane_id); +} + +static void +skl_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + + if (fb->format->cpp[0] == 8) { + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + *num = 10; + *den = 8; + } else { + *num = 9; + *den = 8; + } + } else { + *num = 1; + *den = 1; + } +} + +static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); + unsigned int pixel_rate = crtc_state->pixel_rate; + unsigned int src_w, src_h, dst_w, dst_h; + unsigned int num, den; + + skl_plane_ratio(crtc_state, plane_state, &num, &den); + + /* two pixels per clock on glk+ */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + den *= 2; + + src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + src_h = drm_rect_height(&plane_state->uapi.src) >> 16; + dst_w = drm_rect_width(&plane_state->uapi.dst); + dst_h = drm_rect_height(&plane_state->uapi.dst); + + /* Downscaling limits the maximum pixel rate */ + dst_w = min(src_w, dst_w); + dst_h = min(src_h, dst_h); + + return DIV64_U64_ROUND_UP(mul_u32_u32(pixel_rate * num, src_w * src_h), + mul_u32_u32(den, dst_w * dst_h)); +} + static unsigned int skl_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -354,27 +396,28 @@ skl_program_scaler(struct intel_plane *plane, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler = &crtc_state->scaler_state.scalers[scaler_id]; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - u32 crtc_w = drm_rect_width(&plane_state->base.dst); - u32 crtc_h = drm_rect_height(&plane_state->base.dst); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; + u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); + u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; int hscale, vscale; - hscale = drm_rect_calc_hscale(&plane_state->base.src, - &plane_state->base.dst, + hscale = drm_rect_calc_hscale(&plane_state->uapi.src, + &plane_state->uapi.dst, 0, INT_MAX); - vscale = drm_rect_calc_vscale(&plane_state->base.src, - &plane_state->base.dst, + vscale = drm_rect_calc_vscale(&plane_state->uapi.src, + &plane_state->uapi.dst, 0, INT_MAX); /* TODO: handle sub-pixel coordinates */ - if (is_planar_yuv_format(plane_state->base.fb->format->format) && + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && !icl_is_hdr_plane(dev_priv, plane->id)) { y_hphase = skl_scaler_calc_phase(1, hscale, false); y_vphase = skl_scaler_calc_phase(1, vscale, false); @@ -441,9 +484,21 @@ icl_program_input_csc(struct intel_plane *plane, */ [DRM_COLOR_YCBCR_BT709] = { 0x7C98, 0x7800, 0x0, - 0x9EF8, 0x7800, 0xABF8, + 0x9EF8, 0x7800, 0xAC00, 0x0, 0x7800, 0x7ED8, }, + /* + * BT.2020 full range YCbCr -> full range RGB + * The matrix required is : + * [1.000, 0.000, 1.474, + * 1.000, -0.1645, -0.5713, + * 1.000, 1.8814, 0.0000] + */ + [DRM_COLOR_YCBCR_BT2020] = { + 0x7BC8, 0x7800, 0x0, + 0x8928, 0x7800, 0xAA88, + 0x0, 0x7800, 0x7F10, + }, }; /* Matrix for Limited Range to Full Range Conversion */ @@ -451,34 +506,46 @@ icl_program_input_csc(struct intel_plane *plane, /* * BT.601 Limted range YCbCr -> full range RGB * The matrix required is : - * [1.164384, 0.000, 1.596370, - * 1.138393, -0.382500, -0.794598, - * 1.138393, 1.971696, 0.0000] + * [1.164384, 0.000, 1.596027, + * 1.164384, -0.39175, -0.812813, + * 1.164384, 2.017232, 0.0000] */ [DRM_COLOR_YCBCR_BT601] = { 0x7CC8, 0x7950, 0x0, - 0x8CB8, 0x7918, 0x9C40, - 0x0, 0x7918, 0x7FC8, + 0x8D00, 0x7950, 0x9C88, + 0x0, 0x7950, 0x6810, }, /* * BT.709 Limited range YCbCr -> full range RGB * The matrix required is : - * [1.164, 0.000, 1.833671, - * 1.138393, -0.213249, -0.532909, - * 1.138393, 2.112402, 0.0000] + * [1.164384, 0.000, 1.792741, + * 1.164384, -0.213249, -0.532909, + * 1.164384, 2.112402, 0.0000] */ [DRM_COLOR_YCBCR_BT709] = { - 0x7EA8, 0x7950, 0x0, - 0x8888, 0x7918, 0xADA8, - 0x0, 0x7918, 0x6870, + 0x7E58, 0x7950, 0x0, + 0x8888, 0x7950, 0xADA8, + 0x0, 0x7950, 0x6870, + }, + /* + * BT.2020 Limited range YCbCr -> full range RGB + * The matrix required is : + * [1.164, 0.000, 1.678, + * 1.164, -0.1873, -0.6504, + * 1.164, 2.1417, 0.0000] + */ + [DRM_COLOR_YCBCR_BT2020] = { + 0x7D70, 0x7950, 0x0, + 0x8A68, 0x7950, 0xAC00, + 0x0, 0x7950, 0x6890, }, }; const u16 *csc; - if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) - csc = input_csc_matrix[plane_state->base.color_encoding]; + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + csc = input_csc_matrix[plane_state->hw.color_encoding]; else - csc = input_csc_matrix_lr[plane_state->base.color_encoding]; + csc = input_csc_matrix_lr[plane_state->hw.color_encoding]; I915_WRITE_FW(PLANE_INPUT_CSC_COEFF(pipe, plane_id, 0), ROFF(csc[0]) | GOFF(csc[1])); @@ -492,8 +559,11 @@ icl_program_input_csc(struct intel_plane *plane, I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 0), PREOFF_YUV_TO_RGB_HI); - I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), - PREOFF_YUV_TO_RGB_ME); + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), 0); + else + I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), + PREOFF_YUV_TO_RGB_ME); I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 2), PREOFF_YUV_TO_RGB_LO); I915_WRITE_FW(PLANE_INPUT_CSC_POSTOFF(pipe, plane_id, 0), 0x0); @@ -505,7 +575,7 @@ static void skl_program_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, - int color_plane, bool slave, u32 plane_ctl) + int color_plane) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; @@ -513,19 +583,21 @@ skl_program_plane(struct intel_plane *plane, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->color_plane[color_plane].offset; u32 stride = skl_plane_stride(plane_state, color_plane); - u32 aux_stride = skl_plane_stride(plane_state, 1); - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; + const struct drm_framebuffer *fb = plane_state->hw.fb; + int aux_plane = intel_main_to_aux_plane(fb, color_plane); + u32 aux_dist = plane_state->color_plane[aux_plane].offset - surf_addr; + u32 aux_stride = skl_plane_stride(plane_state, aux_plane); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; u32 x = plane_state->color_plane[color_plane].x; u32 y = plane_state->color_plane[color_plane].y; - u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; - u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; - struct intel_plane *linked = plane_state->linked_plane; - const struct drm_framebuffer *fb = plane_state->base.fb; - u8 alpha = plane_state->base.alpha >> 8; + u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16; + u8 alpha = plane_state->hw.alpha >> 8; u32 plane_color_ctl = 0; unsigned long irqflags; u32 keymsk, keymax; + u32 plane_ctl = plane_state->ctl; plane_ctl |= skl_plane_ctl_crtc(crtc_state); @@ -554,29 +626,13 @@ skl_program_plane(struct intel_plane *plane, I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); I915_WRITE_FW(PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x); I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); - I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), - (plane_state->color_plane[1].offset - surf_addr) | aux_stride); - if (icl_is_hdr_plane(dev_priv, plane_id)) { - u32 cus_ctl = 0; - - if (linked) { - /* Enable and use MPEG-2 chroma siting */ - cus_ctl = PLANE_CUS_ENABLE | - PLANE_CUS_HPHASE_0 | - PLANE_CUS_VPHASE_SIGN_NEGATIVE | - PLANE_CUS_VPHASE_0_25; - - if (linked->id == PLANE_SPRITE5) - cus_ctl |= PLANE_CUS_PLANE_7; - else if (linked->id == PLANE_SPRITE4) - cus_ctl |= PLANE_CUS_PLANE_6; - else - MISSING_CASE(linked->id); - } + if (INTEL_GEN(dev_priv) < 12) + aux_dist |= aux_stride; + I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), aux_dist); - I915_WRITE_FW(PLANE_CUS_CTL(pipe, plane_id), cus_ctl); - } + if (icl_is_hdr_plane(dev_priv, plane_id)) + I915_WRITE_FW(PLANE_CUS_CTL(pipe, plane_id), plane_state->cus_ctl); if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl); @@ -606,7 +662,7 @@ skl_program_plane(struct intel_plane *plane, I915_WRITE_FW(PLANE_SURF(pipe, plane_id), intel_plane_ggtt_offset(plane_state) + surf_addr); - if (!slave && plane_state->scaler_id >= 0) + if (plane_state->scaler_id >= 0) skl_program_scaler(plane, crtc_state, plane_state); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -619,24 +675,12 @@ skl_update_plane(struct intel_plane *plane, { int color_plane = 0; - if (plane_state->linked_plane) { - /* Program the UV plane */ + if (plane_state->planar_linked_plane && !plane_state->planar_slave) + /* Program the UV plane on planar master */ color_plane = 1; - } - skl_program_plane(plane, crtc_state, plane_state, - color_plane, false, plane_state->ctl); -} - -static void -icl_update_slave(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - skl_program_plane(plane, crtc_state, plane_state, 0, true, - plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE); + skl_program_plane(plane, crtc_state, plane_state, color_plane); } - static void skl_disable_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) @@ -683,12 +727,22 @@ skl_plane_get_hw_state(struct intel_plane *plane, return ret; } +static void i9xx_plane_linear_gamma(u16 gamma[8]) +{ + /* The points are not evenly spaced. */ + static const u8 in[8] = { 0, 1, 2, 4, 8, 16, 24, 32 }; + int i; + + for (i = 0; i < 8; i++) + gamma[i] = (in[i] << 8) / 32; +} + static void chv_update_csc(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id plane_id = plane->id; /* * |r| | c0 c1 c2 | |cr| @@ -714,7 +768,7 @@ chv_update_csc(const struct intel_plane_state *plane_state) 0, 4096, 7601, }, }; - const s16 *csc = csc_matrix[plane_state->base.color_encoding]; + const s16 *csc = csc_matrix[plane_state->hw.color_encoding]; /* Seems RGB data bypasses the CSC always */ if (!fb->format->is_yuv) @@ -745,15 +799,15 @@ chv_update_csc(const struct intel_plane_state *plane_state) static void vlv_update_clrc(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; int contrast, brightness, sh_scale, sh_sin, sh_cos; if (fb->format->is_yuv && - plane_state->base.color_range == DRM_COLOR_YCBCR_LIMITED_RANGE) { + plane_state->hw.color_range == DRM_COLOR_YCBCR_LIMITED_RANGE) { /* * Expand limited range to full range: * Contrast is applied first and is used to expand Y range. @@ -781,6 +835,85 @@ vlv_update_clrc(const struct intel_plane_state *plane_state) SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } +static void +vlv_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int cpp = fb->format->cpp[0]; + + /* + * VLV bspec only considers cases where all three planes are + * enabled, and cases where the primary and one sprite is enabled. + * Let's assume the case with just two sprites enabled also + * maps to the latter case. + */ + if (hweight8(active_planes) == 3) { + switch (cpp) { + case 8: + *num = 11; + *den = 8; + break; + case 4: + *num = 18; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + case 4: + *num = 17; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + vlv_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -794,8 +927,8 @@ static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; @@ -814,6 +947,9 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_VYUY: sprctl |= SP_FORMAT_YUV422 | SP_YUV_ORDER_VYUY; break; + case DRM_FORMAT_C8: + sprctl |= SP_FORMAT_8BPP; + break; case DRM_FORMAT_RGB565: sprctl |= SP_FORMAT_BGR565; break; @@ -829,6 +965,12 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_ABGR2101010: sprctl |= SP_FORMAT_RGBA1010102; break; + case DRM_FORMAT_XRGB2101010: + sprctl |= SP_FORMAT_BGRX1010102; + break; + case DRM_FORMAT_ARGB2101010: + sprctl |= SP_FORMAT_BGRA1010102; + break; case DRM_FORMAT_XBGR8888: sprctl |= SP_FORMAT_RGBX8888; break; @@ -840,7 +982,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } - if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) sprctl |= SP_YUV_FORMAT_BT709; if (fb->modifier == I915_FORMAT_MOD_X_TILED) @@ -858,6 +1000,31 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, return sprctl; } +static void vlv_update_gamma(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + enum pipe pipe = plane->pipe; + enum plane_id plane_id = plane->id; + u16 gamma[8]; + int i; + + /* Seems RGB data bypasses the gamma always */ + if (!fb->format->is_yuv) + return; + + i9xx_plane_linear_gamma(gamma); + + /* FIXME these register are single buffered :( */ + /* The two end points are implicit (0.0 and 1.0) */ + for (i = 1; i < 8 - 1; i++) + I915_WRITE_FW(SPGAMC(pipe, plane_id, i - 1), + gamma[i] << 16 | + gamma[i] << 8 | + gamma[i]); +} + static void vlv_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -869,10 +1036,10 @@ vlv_update_plane(struct intel_plane *plane, u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - u32 crtc_w = drm_rect_width(&plane_state->base.dst); - u32 crtc_h = drm_rect_height(&plane_state->base.dst); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; + u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); + u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); u32 x = plane_state->color_plane[0].x; u32 y = plane_state->color_plane[0].y; unsigned long irqflags; @@ -916,6 +1083,7 @@ vlv_update_plane(struct intel_plane *plane, intel_plane_ggtt_offset(plane_state) + sprsurf_offset); vlv_update_clrc(plane_state); + vlv_update_gamma(plane_state); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -961,6 +1129,164 @@ vlv_plane_get_hw_state(struct intel_plane *plane, return ret; } +static void ivb_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int cpp = fb->format->cpp[0]; + + if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + case 4: + *num = 17; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 9; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +static void ivb_plane_ratio_scaling(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int cpp = fb->format->cpp[0]; + + switch (cpp) { + case 8: + *num = 12; + *den = 8; + break; + case 4: + *num = 19; + *den = 16; + break; + case 2: + *num = 33; + *den = 32; + break; + default: + *num = 1; + *den = 1; + break; + } +} + +int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + ivb_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + +static int ivb_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int src_w, dst_w, pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + dst_w = drm_rect_width(&plane_state->uapi.dst); + + if (src_w != dst_w) + ivb_plane_ratio_scaling(crtc_state, plane_state, &num, &den); + else + ivb_plane_ratio(crtc_state, plane_state, &num, &den); + + /* Horizontal downscaling limits the maximum pixel rate */ + dst_w = min(src_w, dst_w); + + return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, num * src_w), + den * dst_w); +} + +static void hsw_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int cpp = fb->format->cpp[0]; + + if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 9; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate = crtc_state->pixel_rate; + unsigned int num, den; + + hsw_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -974,13 +1300,23 @@ static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return sprctl; } +static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + + return fb->format->cpp[0] == 8 && + (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)); +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; @@ -996,6 +1332,18 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: sprctl |= SPRITE_FORMAT_RGBX888; break; + case DRM_FORMAT_XBGR2101010: + sprctl |= SPRITE_FORMAT_RGBX101010 | SPRITE_RGB_ORDER_RGBX; + break; + case DRM_FORMAT_XRGB2101010: + sprctl |= SPRITE_FORMAT_RGBX101010; + break; + case DRM_FORMAT_XBGR16161616F: + sprctl |= SPRITE_FORMAT_RGBX161616 | SPRITE_RGB_ORDER_RGBX; + break; + case DRM_FORMAT_XRGB16161616F: + sprctl |= SPRITE_FORMAT_RGBX161616; + break; case DRM_FORMAT_YUYV: sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YUYV; break; @@ -1013,10 +1361,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } - if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + if (!ivb_need_sprite_gamma(plane_state)) + sprctl |= SPRITE_INT_GAMMA_DISABLE; + + if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) sprctl |= SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709; - if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) sprctl |= SPRITE_YUV_RANGE_CORRECTION_DISABLE; if (fb->modifier == I915_FORMAT_MOD_X_TILED) @@ -1033,6 +1384,62 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return sprctl; } +static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state, + u16 gamma[18]) +{ + int scale, i; + + /* + * WaFP16GammaEnabling:ivb,hsw + * "Workaround : When using the 64-bit format, the sprite output + * on each color channel has one quarter amplitude. It can be + * brought up to full amplitude by using sprite internal gamma + * correction, pipe gamma correction, or pipe color space + * conversion to multiply the sprite output by four." + */ + scale = 4; + + for (i = 0; i < 16; i++) + gamma[i] = min((scale * i << 10) / 16, (1 << 10) - 1); + + gamma[i] = min((scale * i << 10) / 16, 1 << 10); + i++; + + gamma[i] = 3 << 10; + i++; +} + +static void ivb_update_gamma(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + u16 gamma[18]; + int i; + + if (!ivb_need_sprite_gamma(plane_state)) + return; + + ivb_sprite_linear_gamma(plane_state, gamma); + + /* FIXME these register are single buffered :( */ + for (i = 0; i < 16; i++) + I915_WRITE_FW(SPRGAMC(pipe, i), + gamma[i] << 20 | + gamma[i] << 10 | + gamma[i]); + + I915_WRITE_FW(SPRGAMC16(pipe, 0), gamma[i]); + I915_WRITE_FW(SPRGAMC16(pipe, 1), gamma[i]); + I915_WRITE_FW(SPRGAMC16(pipe, 2), gamma[i]); + i++; + + I915_WRITE_FW(SPRGAMC17(pipe, 0), gamma[i]); + I915_WRITE_FW(SPRGAMC17(pipe, 1), gamma[i]); + I915_WRITE_FW(SPRGAMC17(pipe, 2), gamma[i]); + i++; +} + static void ivb_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -1043,14 +1450,14 @@ ivb_update_plane(struct intel_plane *plane, u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - u32 crtc_w = drm_rect_width(&plane_state->base.dst); - u32 crtc_h = drm_rect_height(&plane_state->base.dst); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; + u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); + u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); u32 x = plane_state->color_plane[0].x; u32 y = plane_state->color_plane[0].y; - u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; - u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16; u32 sprctl, sprscale = 0; unsigned long irqflags; @@ -1099,6 +1506,8 @@ ivb_update_plane(struct intel_plane *plane, I915_WRITE_FW(SPRSURF(pipe), intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + ivb_update_gamma(plane_state); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -1144,6 +1553,53 @@ ivb_plane_get_hw_state(struct intel_plane *plane, return ret; } +static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int hscale, pixel_rate; + unsigned int limit, decimate; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + /* Horizontal downscaling limits the maximum pixel rate */ + hscale = drm_rect_calc_hscale(&plane_state->uapi.src, + &plane_state->uapi.dst, + 0, INT_MAX); + if (hscale < 0x10000) + return pixel_rate; + + /* Decimation steps at 2x,4x,8x,16x */ + decimate = ilog2(hscale >> 16); + hscale >>= decimate; + + /* Starting limit is 90% of cdclk */ + limit = 9; + + /* -10% per decimation step */ + limit -= decimate; + + /* -10% for RGB */ + if (fb->format->cpp[0] >= 4) + limit--; /* -10% for RGB */ + + /* + * We should also do -10% if sprite scaling is enabled + * on the other pipe, but we can't really check for that, + * so we ignore it. + */ + + return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, 10 * hscale), + limit << 16); +} + static unsigned int g4x_sprite_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -1169,9 +1625,9 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 dvscntr; @@ -1187,6 +1643,18 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: dvscntr |= DVS_FORMAT_RGBX888; break; + case DRM_FORMAT_XBGR2101010: + dvscntr |= DVS_FORMAT_RGBX101010 | DVS_RGB_ORDER_XBGR; + break; + case DRM_FORMAT_XRGB2101010: + dvscntr |= DVS_FORMAT_RGBX101010; + break; + case DRM_FORMAT_XBGR16161616F: + dvscntr |= DVS_FORMAT_RGBX161616 | DVS_RGB_ORDER_XBGR; + break; + case DRM_FORMAT_XRGB16161616F: + dvscntr |= DVS_FORMAT_RGBX161616; + break; case DRM_FORMAT_YUYV: dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YUYV; break; @@ -1204,10 +1672,10 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } - if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) dvscntr |= DVS_YUV_FORMAT_BT709; - if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) dvscntr |= DVS_YUV_RANGE_CORRECTION_DISABLE; if (fb->modifier == I915_FORMAT_MOD_X_TILED) @@ -1224,6 +1692,66 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, return dvscntr; } +static void g4x_update_gamma(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + enum pipe pipe = plane->pipe; + u16 gamma[8]; + int i; + + /* Seems RGB data bypasses the gamma always */ + if (!fb->format->is_yuv) + return; + + i9xx_plane_linear_gamma(gamma); + + /* FIXME these register are single buffered :( */ + /* The two end points are implicit (0.0 and 1.0) */ + for (i = 1; i < 8 - 1; i++) + I915_WRITE_FW(DVSGAMC_G4X(pipe, i - 1), + gamma[i] << 16 | + gamma[i] << 8 | + gamma[i]); +} + +static void ilk_sprite_linear_gamma(u16 gamma[17]) +{ + int i; + + for (i = 0; i < 17; i++) + gamma[i] = (i << 10) / 16; +} + +static void ilk_update_gamma(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + enum pipe pipe = plane->pipe; + u16 gamma[17]; + int i; + + /* Seems RGB data bypasses the gamma always */ + if (!fb->format->is_yuv) + return; + + ilk_sprite_linear_gamma(gamma); + + /* FIXME these register are single buffered :( */ + for (i = 0; i < 16; i++) + I915_WRITE_FW(DVSGAMC_ILK(pipe, i), + gamma[i] << 20 | + gamma[i] << 10 | + gamma[i]); + + I915_WRITE_FW(DVSGAMCMAX_ILK(pipe, 0), gamma[i]); + I915_WRITE_FW(DVSGAMCMAX_ILK(pipe, 1), gamma[i]); + I915_WRITE_FW(DVSGAMCMAX_ILK(pipe, 2), gamma[i]); + i++; +} + static void g4x_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -1234,14 +1762,14 @@ g4x_update_plane(struct intel_plane *plane, u32 dvssurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - u32 crtc_w = drm_rect_width(&plane_state->base.dst); - u32 crtc_h = drm_rect_height(&plane_state->base.dst); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_y = plane_state->uapi.dst.y1; + u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); + u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); u32 x = plane_state->color_plane[0].x; u32 y = plane_state->color_plane[0].y; - u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; - u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16; u32 dvscntr, dvsscale = 0; unsigned long irqflags; @@ -1283,6 +1811,11 @@ g4x_update_plane(struct intel_plane *plane, I915_WRITE_FW(DVSSURF(pipe), intel_plane_ggtt_offset(plane_state) + dvssurf_offset); + if (IS_G4X(dev_priv)) + g4x_update_gamma(plane_state); + else + ilk_update_gamma(plane_state); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -1335,6 +1868,11 @@ static bool intel_fb_scalable(const struct drm_framebuffer *fb) switch (fb->format->format) { case DRM_FORMAT_C8: return false; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + return INTEL_GEN(to_i915(fb->dev)) >= 11; default: return true; } @@ -1344,12 +1882,13 @@ static int g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - const struct drm_rect *src = &plane_state->base.src; - const struct drm_rect *dst = &plane_state->base.dst; - int src_x, src_y, src_w, src_h, crtc_w, crtc_h; + const struct drm_framebuffer *fb = plane_state->hw.fb; + const struct drm_rect *src = &plane_state->uapi.src; + const struct drm_rect *dst = &plane_state->uapi.dst; + int src_x, src_w, src_h, crtc_w, crtc_h; const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; + unsigned int stride = plane_state->color_plane[0].stride; unsigned int cpp = fb->format->cpp[0]; unsigned int width_bytes; int min_width, min_height; @@ -1358,7 +1897,6 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, crtc_h = drm_rect_height(dst); src_x = src->x1 >> 16; - src_y = src->y1 >> 16; src_w = drm_rect_width(src) >> 16; src_h = drm_rect_height(src) >> 16; @@ -1392,9 +1930,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, return -EINVAL; } - if (width_bytes > 4096 || fb->pitches[0] > 4096) { + if (stride > 4096) { DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n", - fb->pitches[0], 4096); + stride, 4096); return -EINVAL; } @@ -1405,13 +1943,13 @@ static int g4x_sprite_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; int ret; - if (intel_fb_scalable(plane_state->base.fb)) { + if (intel_fb_scalable(plane_state->hw.fb)) { if (INTEL_GEN(dev_priv) < 7) { min_scale = 1; max_scale = 16 << 16; @@ -1421,8 +1959,8 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, } } - ret = drm_atomic_helper_check_plane_state(&plane_state->base, - &crtc_state->base, + ret = drm_atomic_helper_check_plane_state(&plane_state->uapi, + &crtc_state->uapi, min_scale, max_scale, true, true); if (ret) @@ -1432,7 +1970,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; ret = intel_plane_check_src_coordinates(plane_state); @@ -1453,9 +1991,9 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, int chv_plane_check_rotation(const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - unsigned int rotation = plane_state->base.rotation; + unsigned int rotation = plane_state->hw.rotation; /* CHV ignores the mirror bit when the rotate bit is set :( */ if (IS_CHERRYVIEW(dev_priv) && @@ -1478,8 +2016,8 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = drm_atomic_helper_check_plane_state(&plane_state->base, - &crtc_state->base, + ret = drm_atomic_helper_check_plane_state(&plane_state->uapi, + &crtc_state->uapi, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, true, true); @@ -1490,7 +2028,7 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; ret = intel_plane_check_src_coordinates(plane_state); @@ -1505,10 +2043,10 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; struct drm_format_name_buf format_name; if (!fb) @@ -1563,12 +2101,14 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, } /* Y-tiling is not supported in IF-ID Interlace mode */ - if (crtc_state->base.enable && - crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE && + if (crtc_state->hw.enable && + crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE && (fb->modifier == I915_FORMAT_MOD_Y_TILED || fb->modifier == I915_FORMAT_MOD_Yf_TILED || fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) { + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)) { DRM_DEBUG_KMS("Y/Yf tiling not supported in IF-ID mode\n"); return -EINVAL; } @@ -1580,9 +2120,9 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - int crtc_x = plane_state->base.dst.x1; - int crtc_w = drm_rect_width(&plane_state->base.dst); + to_i915(plane_state->uapi.plane->dev); + int crtc_x = plane_state->uapi.dst.x1; + int crtc_w = drm_rect_width(&plane_state->uapi.dst); int pipe_src_w = crtc_state->pipe_src_w; /* @@ -1608,12 +2148,13 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - int src_w = drm_rect_width(&plane_state->base.src) >> 16; + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; + int src_w = drm_rect_width(&plane_state->uapi.src) >> 16; /* Display WA #1106 */ - if (is_planar_yuv_format(fb->format->format) && src_w & 3 && + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && + src_w & 3 && (rotation == DRM_MODE_ROTATE_270 || rotation == (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90))) { DRM_DEBUG_KMS("src width must be multiple of 4 for rotated planar YUV\n"); @@ -1623,12 +2164,28 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s return 0; } +static int skl_plane_max_scale(struct drm_i915_private *dev_priv, + const struct drm_framebuffer *fb) +{ + /* + * We don't yet know the final source width nor + * whether we can use the HQ scaler mode. Assume + * the best case. + * FIXME need to properly check this later. + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || + !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) + return 0x30000 - 1; + else + return 0x20000 - 1; +} + static int skl_plane_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; int ret; @@ -1640,11 +2197,11 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, /* use scaler when colorkey is not required */ if (!plane_state->ckey.flags && intel_fb_scalable(fb)) { min_scale = 1; - max_scale = skl_max_scale(crtc_state, fb->format->format); + max_scale = skl_plane_max_scale(dev_priv, fb); } - ret = drm_atomic_helper_check_plane_state(&plane_state->base, - &crtc_state->base, + ret = drm_atomic_helper_check_plane_state(&plane_state->uapi, + &crtc_state->uapi, min_scale, max_scale, true, true); if (ret) @@ -1654,7 +2211,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (!plane_state->base.visible) + if (!plane_state->uapi.visible) return 0; ret = skl_plane_check_dst_coordinates(crtc_state, plane_state); @@ -1670,8 +2227,8 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, return ret; /* HW only has 8 bits pixel precision, disable plane if invisible */ - if (!(plane_state->base.alpha >> 8)) - plane_state->base.visible = false; + if (!(plane_state->hw.alpha >> 8)) + plane_state->uapi.visible = false; plane_state->ctl = skl_plane_ctl(crtc_state, plane_state); @@ -1679,6 +2236,15 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, plane_state->color_ctl = glk_plane_color_ctl(crtc_state, plane_state); + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && + icl_is_hdr_plane(dev_priv, plane->id)) + /* Enable and use MPEG-2 chroma siting */ + plane_state->cus_ctl = PLANE_CUS_ENABLE | + PLANE_CUS_HPHASE_0 | + PLANE_CUS_VPHASE_SIGN_NEGATIVE | PLANE_CUS_VPHASE_0_25; + else + plane_state->cus_ctl = 0; + return 0; } @@ -1690,7 +2256,7 @@ static bool has_dst_key_in_primary_plane(struct drm_i915_private *dev_priv) static void intel_plane_set_ckey(struct intel_plane_state *plane_state, const struct drm_intel_sprite_colorkey *set) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -1815,8 +2381,12 @@ static const u64 i9xx_plane_format_modifiers[] = { }; static const u32 snb_plane_formats[] = { - DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -1824,11 +2394,12 @@ static const u32 snb_plane_formats[] = { }; static const u32 vlv_plane_formats[] = { + DRM_FORMAT_C8, DRM_FORMAT_RGB565, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, DRM_FORMAT_XBGR2101010, DRM_FORMAT_ABGR2101010, DRM_FORMAT_YUYV, @@ -1837,7 +2408,7 @@ static const u32 vlv_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const u32 skl_plane_formats[] = { +static const u32 chv_pipe_b_sprite_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1846,13 +2417,15 @@ static const u32 skl_plane_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, }; -static const u32 icl_plane_formats[] = { +static const u32 skl_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1861,19 +2434,15 @@ static const u32 icl_plane_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, - DRM_FORMAT_Y210, - DRM_FORMAT_Y212, - DRM_FORMAT_Y216, - DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; -static const u32 icl_hdr_plane_formats[] = { +static const u32 skl_planar_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1884,21 +2453,14 @@ static const u32 icl_hdr_plane_formats[] = { DRM_FORMAT_XBGR2101010, DRM_FORMAT_XRGB16161616F, DRM_FORMAT_XBGR16161616F, - DRM_FORMAT_ARGB16161616F, - DRM_FORMAT_ABGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, - DRM_FORMAT_Y210, - DRM_FORMAT_Y212, - DRM_FORMAT_Y216, - DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, + DRM_FORMAT_NV12, }; -static const u32 skl_planar_formats[] = { +static const u32 glk_planar_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1907,14 +2469,19 @@ static const u32 skl_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_NV12, + DRM_FORMAT_P010, + DRM_FORMAT_P012, + DRM_FORMAT_P016, }; -static const u32 glk_planar_formats[] = { +static const u32 icl_sdr_y_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1923,17 +2490,21 @@ static const u32 glk_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, - DRM_FORMAT_NV12, - DRM_FORMAT_P010, - DRM_FORMAT_P012, - DRM_FORMAT_P016, + DRM_FORMAT_Y210, + DRM_FORMAT_Y212, + DRM_FORMAT_Y216, + DRM_FORMAT_XVYU2101010, + DRM_FORMAT_XVYU12_16161616, + DRM_FORMAT_XVYU16161616, }; -static const u32 icl_planar_formats[] = { +static const u32 icl_sdr_uv_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1942,6 +2513,8 @@ static const u32 icl_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -1958,7 +2531,7 @@ static const u32 icl_planar_formats[] = { DRM_FORMAT_XVYU16161616, }; -static const u32 icl_hdr_planar_formats[] = { +static const u32 icl_hdr_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1967,6 +2540,8 @@ static const u32 icl_hdr_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_XRGB16161616F, DRM_FORMAT_XBGR16161616F, DRM_FORMAT_ARGB16161616F, @@ -2005,6 +2580,23 @@ static const u64 skl_plane_format_modifiers_ccs[] = { DRM_FORMAT_MOD_INVALID }; +static const u64 gen12_plane_format_modifiers_mc_ccs[] = { + I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS, + I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS, + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static const u64 gen12_plane_format_modifiers_rc_ccs[] = { + I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS, + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + static bool g4x_sprite_format_mod_supported(struct drm_plane *_plane, u32 format, u64 modifier) { @@ -2045,6 +2637,10 @@ static bool snb_sprite_format_mod_supported(struct drm_plane *_plane, switch (format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_XBGR16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2070,6 +2666,7 @@ static bool vlv_sprite_format_mod_supported(struct drm_plane *_plane, } switch (format) { + case DRM_FORMAT_C8: case DRM_FORMAT_RGB565: case DRM_FORMAT_ABGR8888: case DRM_FORMAT_ARGB8888: @@ -2077,6 +2674,8 @@ static bool vlv_sprite_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR2101010: case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2121,6 +2720,8 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2153,6 +2754,75 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, } } +static bool gen12_plane_supports_mc_ccs(enum plane_id plane_id) +{ + return plane_id < PLANE_SPRITE4; +} + +static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, + u32 format, u64 modifier) +{ + struct intel_plane *plane = to_intel_plane(_plane); + + switch (modifier) { + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + if (!gen12_plane_supports_mc_ccs(plane->id)) + return false; + /* fall through */ + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + break; + default: + return false; + } + + switch (format) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + if (is_ccs_modifier(modifier)) + return true; + /* fall through */ + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + case DRM_FORMAT_UYVY: + case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) + return true; + /* fall through */ + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XVYU2101010: + case DRM_FORMAT_C8: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_Y210: + case DRM_FORMAT_Y212: + case DRM_FORMAT_Y216: + case DRM_FORMAT_XVYU12_16161616: + case DRM_FORMAT_XVYU16161616: + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == I915_FORMAT_MOD_X_TILED || + modifier == I915_FORMAT_MOD_Y_TILED) + return true; + /* fall through */ + default: + return false; + } +} + static const struct drm_plane_funcs g4x_sprite_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -2189,6 +2859,15 @@ static const struct drm_plane_funcs skl_plane_funcs = { .format_mod_supported = skl_plane_format_mod_supported, }; +static const struct drm_plane_funcs gen12_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = intel_plane_destroy, + .atomic_duplicate_state = intel_plane_duplicate_state, + .atomic_destroy_state = intel_plane_destroy_state, + .format_mod_supported = gen12_plane_format_mod_supported, +}; + static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2201,9 +2880,6 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, static bool skl_plane_has_planar(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { - if (INTEL_GEN(dev_priv) >= 11) - return plane_id <= PLANE_SPRITE3; - /* Display WA #0870: skl, bxt */ if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) return false; @@ -2217,6 +2893,56 @@ static bool skl_plane_has_planar(struct drm_i915_private *dev_priv, return true; } +static const u32 *skl_get_plane_formats(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id, + int *num_formats) +{ + if (skl_plane_has_planar(dev_priv, pipe, plane_id)) { + *num_formats = ARRAY_SIZE(skl_planar_formats); + return skl_planar_formats; + } else { + *num_formats = ARRAY_SIZE(skl_plane_formats); + return skl_plane_formats; + } +} + +static const u32 *glk_get_plane_formats(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id, + int *num_formats) +{ + if (skl_plane_has_planar(dev_priv, pipe, plane_id)) { + *num_formats = ARRAY_SIZE(glk_planar_formats); + return glk_planar_formats; + } else { + *num_formats = ARRAY_SIZE(skl_plane_formats); + return skl_plane_formats; + } +} + +static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id, + int *num_formats) +{ + if (icl_is_hdr_plane(dev_priv, plane_id)) { + *num_formats = ARRAY_SIZE(icl_hdr_plane_formats); + return icl_hdr_plane_formats; + } else if (icl_is_nv12_y_plane(plane_id)) { + *num_formats = ARRAY_SIZE(icl_sdr_y_plane_formats); + return icl_sdr_y_plane_formats; + } else { + *num_formats = ARRAY_SIZE(icl_sdr_uv_plane_formats); + return icl_sdr_uv_plane_formats; + } +} + +static const u64 *gen12_get_plane_modifiers(enum plane_id plane_id) +{ + if (gen12_plane_supports_mc_ccs(plane_id)) + return gen12_plane_format_modifiers_mc_ccs; + else + return gen12_plane_format_modifiers_rc_ccs; +} + static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2238,6 +2964,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { + const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; @@ -2267,39 +2994,29 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = skl_disable_plane; plane->get_hw_state = skl_plane_get_hw_state; plane->check_plane = skl_plane_check; - if (icl_is_nv12_y_plane(plane_id)) - plane->update_slave = icl_update_slave; + plane->min_cdclk = skl_plane_min_cdclk; - if (skl_plane_has_planar(dev_priv, pipe, plane_id)) { - if (icl_is_hdr_plane(dev_priv, plane_id)) { - formats = icl_hdr_planar_formats; - num_formats = ARRAY_SIZE(icl_hdr_planar_formats); - } else if (INTEL_GEN(dev_priv) >= 11) { - formats = icl_planar_formats; - num_formats = ARRAY_SIZE(icl_planar_formats); - } else if (INTEL_GEN(dev_priv) == 10 || IS_GEMINILAKE(dev_priv)) { - formats = glk_planar_formats; - num_formats = ARRAY_SIZE(glk_planar_formats); - } else { - formats = skl_planar_formats; - num_formats = ARRAY_SIZE(skl_planar_formats); - } - } else if (icl_is_hdr_plane(dev_priv, plane_id)) { - formats = icl_hdr_plane_formats; - num_formats = ARRAY_SIZE(icl_hdr_plane_formats); - } else if (INTEL_GEN(dev_priv) >= 11) { - formats = icl_plane_formats; - num_formats = ARRAY_SIZE(icl_plane_formats); - } else { - formats = skl_plane_formats; - num_formats = ARRAY_SIZE(skl_plane_formats); - } + if (INTEL_GEN(dev_priv) >= 11) + formats = icl_get_plane_formats(dev_priv, pipe, + plane_id, &num_formats); + else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + formats = glk_get_plane_formats(dev_priv, pipe, + plane_id, &num_formats); + else + formats = skl_get_plane_formats(dev_priv, pipe, + plane_id, &num_formats); plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); - if (plane->has_ccs) - modifiers = skl_plane_format_modifiers_ccs; - else - modifiers = skl_plane_format_modifiers_noccs; + if (INTEL_GEN(dev_priv) >= 12) { + modifiers = gen12_get_plane_modifiers(plane_id); + plane_funcs = &gen12_plane_funcs; + } else { + if (plane->has_ccs) + modifiers = skl_plane_format_modifiers_ccs; + else + modifiers = skl_plane_format_modifiers_noccs; + plane_funcs = &skl_plane_funcs; + } if (plane_id == PLANE_PRIMARY) plane_type = DRM_PLANE_TYPE_PRIMARY; @@ -2309,7 +3026,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, possible_crtcs = BIT(pipe); ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, - possible_crtcs, &skl_plane_funcs, + possible_crtcs, plane_funcs, formats, num_formats, modifiers, plane_type, "plane %d%c", plane_id + 1, @@ -2342,6 +3059,8 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, BIT(DRM_MODE_BLEND_PREMULTI) | BIT(DRM_MODE_BLEND_COVERAGE)); + drm_plane_create_zpos_immutable_property(&plane->base, plane_id); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -2363,7 +3082,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -2379,9 +3098,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = vlv_disable_plane; plane->get_hw_state = vlv_plane_get_hw_state; plane->check_plane = vlv_sprite_check; + plane->min_cdclk = vlv_plane_min_cdclk; - formats = vlv_plane_formats; - num_formats = ARRAY_SIZE(vlv_plane_formats); + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { + formats = chv_pipe_b_sprite_formats; + num_formats = ARRAY_SIZE(chv_pipe_b_sprite_formats); + } else { + formats = vlv_plane_formats; + num_formats = ARRAY_SIZE(vlv_plane_formats); + } modifiers = i9xx_plane_format_modifiers; plane_funcs = &vlv_sprite_funcs; @@ -2392,6 +3117,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->get_hw_state = ivb_plane_get_hw_state; plane->check_plane = g4x_sprite_check; + if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + plane->min_cdclk = hsw_plane_min_cdclk; + else + plane->min_cdclk = ivb_sprite_min_cdclk; + formats = snb_plane_formats; num_formats = ARRAY_SIZE(snb_plane_formats); modifiers = i9xx_plane_format_modifiers; @@ -2403,6 +3133,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = g4x_disable_plane; plane->get_hw_state = g4x_plane_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->min_cdclk = g4x_sprite_min_cdclk; modifiers = i9xx_plane_format_modifiers; if (IS_GEN(dev_priv, 6)) { @@ -2453,6 +3184,9 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + zpos = sprite + 1; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 500f6bffb139..5eeaa92420d1 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -8,7 +8,6 @@ #include <linux/types.h> -#include "i915_drv.h" #include "intel_display.h" struct drm_device; @@ -18,7 +17,6 @@ struct drm_i915_private; struct intel_crtc_state; struct intel_plane_state; -bool is_planar_yuv_format(u32 pixelformat); int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, @@ -49,11 +47,13 @@ static inline u8 icl_hdr_plane_mask(void) BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1); } -static inline bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, - enum plane_id plane_id) -{ - return INTEL_GEN(dev_priv) >= 11 && - icl_hdr_plane_mask() & BIT(plane_id); -} +bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id); + +int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); #endif /* __INTEL_SPRITE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c new file mode 100644 index 000000000000..7773169b7331 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_display.h" +#include "intel_display_types.h" +#include "intel_dp_mst.h" +#include "intel_tc.h" + +static const char *tc_port_mode_name(enum tc_port_mode mode) +{ + static const char * const names[] = { + [TC_PORT_TBT_ALT] = "tbt-alt", + [TC_PORT_DP_ALT] = "dp-alt", + [TC_PORT_LEGACY] = "legacy", + }; + + if (WARN_ON(mode >= ARRAY_SIZE(names))) + mode = TC_PORT_TBT_ALT; + + return names[mode]; +} + +static void +tc_port_load_fia_params(struct drm_i915_private *i915, + struct intel_digital_port *dig_port) +{ + enum port port = dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(i915, port); + u32 modular_fia; + + if (INTEL_INFO(i915)->display.has_modular_fia) { + modular_fia = intel_uncore_read(&i915->uncore, + PORT_TX_DFLEXDPSP(FIA1)); + modular_fia &= MODULAR_FIA_MASK; + } else { + modular_fia = 0; + } + + /* + * Each Modular FIA instance houses 2 TC ports. In SOC that has more + * than two TC ports, there are multiple instances of Modular FIA. + */ + if (modular_fia) { + dig_port->tc_phy_fia = tc_port / 2; + dig_port->tc_phy_fia_idx = tc_port % 2; + } else { + dig_port->tc_phy_fia = FIA1; + dig_port->tc_phy_fia_idx = tc_port; + } +} + +u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 lane_mask; + + lane_mask = intel_uncore_read(uncore, + PORT_TX_DFLEXDPSP(dig_port->tc_phy_fia)); + + WARN_ON(lane_mask == 0xffffffff); + + lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx); + return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); +} + +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 pin_mask; + + pin_mask = intel_uncore_read(uncore, + PORT_TX_DFLEXPA1(dig_port->tc_phy_fia)); + + WARN_ON(pin_mask == 0xffffffff); + + return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >> + DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); +} + +int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + intel_wakeref_t wakeref; + u32 lane_mask; + + if (dig_port->tc_mode != TC_PORT_DP_ALT) + return 4; + + lane_mask = 0; + with_intel_display_power(i915, POWER_DOMAIN_DISPLAY_CORE, wakeref) + lane_mask = intel_tc_port_get_lane_mask(dig_port); + + switch (lane_mask) { + default: + MISSING_CASE(lane_mask); + /* fall-through */ + case 0x1: + case 0x2: + case 0x4: + case 0x8: + return 1; + case 0x3: + case 0xc: + return 2; + case 0xf: + return 4; + } +} + +void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, + int required_lanes) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + struct intel_uncore *uncore = &i915->uncore; + u32 val; + + WARN_ON(lane_reversal && dig_port->tc_mode != TC_PORT_LEGACY); + + val = intel_uncore_read(uncore, + PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia)); + val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx); + + switch (required_lanes) { + case 1: + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML0(dig_port->tc_phy_fia_idx); + break; + case 2: + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3_2(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML1_0(dig_port->tc_phy_fia_idx); + break; + case 4: + val |= DFLEXDPMLE1_DPMLETC_ML3_0(dig_port->tc_phy_fia_idx); + break; + default: + MISSING_CASE(required_lanes); + } + + intel_uncore_write(uncore, + PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia), val); +} + +static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port, + u32 live_status_mask) +{ + u32 valid_hpd_mask; + + if (dig_port->tc_legacy_port) + valid_hpd_mask = BIT(TC_PORT_LEGACY); + else + valid_hpd_mask = BIT(TC_PORT_DP_ALT) | + BIT(TC_PORT_TBT_ALT); + + if (!(live_status_mask & ~valid_hpd_mask)) + return; + + /* If live status mismatches the VBT flag, trust the live status. */ + DRM_ERROR("Port %s: live status %08x mismatches the legacy port flag, fix flag\n", + dig_port->tc_port_name, live_status_mask); + + dig_port->tc_legacy_port = !dig_port->tc_legacy_port; +} + +static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); + struct intel_uncore *uncore = &i915->uncore; + u32 mask = 0; + u32 val; + + val = intel_uncore_read(uncore, + PORT_TX_DFLEXDPSP(dig_port->tc_phy_fia)); + + if (val == 0xffffffff) { + DRM_DEBUG_KMS("Port %s: PHY in TCCOLD, nothing connected\n", + dig_port->tc_port_name); + return mask; + } + + if (val & TC_LIVE_STATE_TBT(dig_port->tc_phy_fia_idx)) + mask |= BIT(TC_PORT_TBT_ALT); + if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx)) + mask |= BIT(TC_PORT_DP_ALT); + + if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) + mask |= BIT(TC_PORT_LEGACY); + + /* The sink can be connected only in a single mode. */ + if (!WARN_ON(hweight32(mask) > 1)) + tc_port_fixup_legacy_flag(dig_port, mask); + + return mask; +} + +static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 val; + + val = intel_uncore_read(uncore, + PORT_TX_DFLEXDPPMS(dig_port->tc_phy_fia)); + if (val == 0xffffffff) { + DRM_DEBUG_KMS("Port %s: PHY in TCCOLD, assuming not complete\n", + dig_port->tc_port_name); + return false; + } + + return val & DP_PHY_MODE_STATUS_COMPLETED(dig_port->tc_phy_fia_idx); +} + +static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, + bool enable) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 val; + + val = intel_uncore_read(uncore, + PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia)); + if (val == 0xffffffff) { + DRM_DEBUG_KMS("Port %s: PHY in TCCOLD, can't set safe-mode to %s\n", + dig_port->tc_port_name, + enableddisabled(enable)); + + return false; + } + + val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); + if (!enable) + val |= DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); + + intel_uncore_write(uncore, + PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia), val); + + if (enable && wait_for(!icl_tc_phy_status_complete(dig_port), 10)) + DRM_DEBUG_KMS("Port %s: PHY complete clear timed out\n", + dig_port->tc_port_name); + + return true; +} + +static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 val; + + val = intel_uncore_read(uncore, + PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia)); + if (val == 0xffffffff) { + DRM_DEBUG_KMS("Port %s: PHY in TCCOLD, assume safe mode\n", + dig_port->tc_port_name); + return true; + } + + return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx)); +} + +/* + * This function implements the first part of the Connect Flow described by our + * specification, Gen11 TypeC Programming chapter. The rest of the flow (reading + * lanes, EDID, etc) is done as needed in the typical places. + * + * Unlike the other ports, type-C ports are not available to use as soon as we + * get a hotplug. The type-C PHYs can be shared between multiple controllers: + * display, USB, etc. As a result, handshaking through FIA is required around + * connect and disconnect to cleanly transfer ownership with the controller and + * set the type-C power state. + */ +static void icl_tc_phy_connect(struct intel_digital_port *dig_port, + int required_lanes) +{ + int max_lanes; + + if (!icl_tc_phy_status_complete(dig_port)) { + DRM_DEBUG_KMS("Port %s: PHY not ready\n", + dig_port->tc_port_name); + goto out_set_tbt_alt_mode; + } + + if (!icl_tc_phy_set_safe_mode(dig_port, false) && + !WARN_ON(dig_port->tc_legacy_port)) + goto out_set_tbt_alt_mode; + + max_lanes = intel_tc_port_fia_max_lane_count(dig_port); + if (dig_port->tc_legacy_port) { + WARN_ON(max_lanes != 4); + dig_port->tc_mode = TC_PORT_LEGACY; + + return; + } + + /* + * Now we have to re-check the live state, in case the port recently + * became disconnected. Not necessary for legacy mode. + */ + if (!(tc_port_live_status_mask(dig_port) & BIT(TC_PORT_DP_ALT))) { + DRM_DEBUG_KMS("Port %s: PHY sudden disconnect\n", + dig_port->tc_port_name); + goto out_set_safe_mode; + } + + if (max_lanes < required_lanes) { + DRM_DEBUG_KMS("Port %s: PHY max lanes %d < required lanes %d\n", + dig_port->tc_port_name, + max_lanes, required_lanes); + goto out_set_safe_mode; + } + + dig_port->tc_mode = TC_PORT_DP_ALT; + + return; + +out_set_safe_mode: + icl_tc_phy_set_safe_mode(dig_port, true); +out_set_tbt_alt_mode: + dig_port->tc_mode = TC_PORT_TBT_ALT; +} + +/* + * See the comment at the connect function. This implements the Disconnect + * Flow. + */ +static void icl_tc_phy_disconnect(struct intel_digital_port *dig_port) +{ + switch (dig_port->tc_mode) { + case TC_PORT_LEGACY: + /* Nothing to do, we never disconnect from legacy mode */ + break; + case TC_PORT_DP_ALT: + icl_tc_phy_set_safe_mode(dig_port, true); + dig_port->tc_mode = TC_PORT_TBT_ALT; + break; + case TC_PORT_TBT_ALT: + /* Nothing to do, we stay in TBT-alt mode */ + break; + default: + MISSING_CASE(dig_port->tc_mode); + } +} + +static bool icl_tc_phy_is_connected(struct intel_digital_port *dig_port) +{ + if (!icl_tc_phy_status_complete(dig_port)) { + DRM_DEBUG_KMS("Port %s: PHY status not complete\n", + dig_port->tc_port_name); + return dig_port->tc_mode == TC_PORT_TBT_ALT; + } + + if (icl_tc_phy_is_in_safe_mode(dig_port)) { + DRM_DEBUG_KMS("Port %s: PHY still in safe mode\n", + dig_port->tc_port_name); + + return false; + } + + return dig_port->tc_mode == TC_PORT_DP_ALT || + dig_port->tc_mode == TC_PORT_LEGACY; +} + +static enum tc_port_mode +intel_tc_port_get_current_mode(struct intel_digital_port *dig_port) +{ + u32 live_status_mask = tc_port_live_status_mask(dig_port); + bool in_safe_mode = icl_tc_phy_is_in_safe_mode(dig_port); + enum tc_port_mode mode; + + if (in_safe_mode || WARN_ON(!icl_tc_phy_status_complete(dig_port))) + return TC_PORT_TBT_ALT; + + mode = dig_port->tc_legacy_port ? TC_PORT_LEGACY : TC_PORT_DP_ALT; + if (live_status_mask) { + enum tc_port_mode live_mode = fls(live_status_mask) - 1; + + if (!WARN_ON(live_mode == TC_PORT_TBT_ALT)) + mode = live_mode; + } + + return mode; +} + +static enum tc_port_mode +intel_tc_port_get_target_mode(struct intel_digital_port *dig_port) +{ + u32 live_status_mask = tc_port_live_status_mask(dig_port); + + if (live_status_mask) + return fls(live_status_mask) - 1; + + return icl_tc_phy_status_complete(dig_port) && + dig_port->tc_legacy_port ? TC_PORT_LEGACY : + TC_PORT_TBT_ALT; +} + +static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, + int required_lanes) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum tc_port_mode old_tc_mode = dig_port->tc_mode; + + intel_display_power_flush_work(i915); + WARN_ON(intel_display_power_is_enabled(i915, + intel_aux_power_domain(dig_port))); + + icl_tc_phy_disconnect(dig_port); + icl_tc_phy_connect(dig_port, required_lanes); + + DRM_DEBUG_KMS("Port %s: TC port mode reset (%s -> %s)\n", + dig_port->tc_port_name, + tc_port_mode_name(old_tc_mode), + tc_port_mode_name(dig_port->tc_mode)); +} + +static void +intel_tc_port_link_init_refcount(struct intel_digital_port *dig_port, + int refcount) +{ + WARN_ON(dig_port->tc_link_refcount); + dig_port->tc_link_refcount = refcount; +} + +void intel_tc_port_sanitize(struct intel_digital_port *dig_port) +{ + struct intel_encoder *encoder = &dig_port->base; + int active_links = 0; + + mutex_lock(&dig_port->tc_lock); + + dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port); + if (dig_port->dp.is_mst) + active_links = intel_dp_mst_encoder_active_links(dig_port); + else if (encoder->base.crtc) + active_links = to_intel_crtc(encoder->base.crtc)->active; + + if (active_links) { + if (!icl_tc_phy_is_connected(dig_port)) + DRM_DEBUG_KMS("Port %s: PHY disconnected with %d active link(s)\n", + dig_port->tc_port_name, active_links); + intel_tc_port_link_init_refcount(dig_port, active_links); + + goto out; + } + + if (dig_port->tc_legacy_port) + icl_tc_phy_connect(dig_port, 1); + +out: + DRM_DEBUG_KMS("Port %s: sanitize mode (%s)\n", + dig_port->tc_port_name, + tc_port_mode_name(dig_port->tc_mode)); + + mutex_unlock(&dig_port->tc_lock); +} + +static bool intel_tc_port_needs_reset(struct intel_digital_port *dig_port) +{ + return intel_tc_port_get_target_mode(dig_port) != dig_port->tc_mode; +} + +/* + * The type-C ports are different because even when they are connected, they may + * not be available/usable by the graphics driver: see the comment on + * icl_tc_phy_connect(). So in our driver instead of adding the additional + * concept of "usable" and make everything check for "connected and usable" we + * define a port as "connected" when it is not only connected, but also when it + * is usable by the rest of the driver. That maintains the old assumption that + * connected ports are usable, and avoids exposing to the users objects they + * can't really use. + */ +bool intel_tc_port_connected(struct intel_digital_port *dig_port) +{ + bool is_connected; + + intel_tc_port_lock(dig_port); + is_connected = tc_port_live_status_mask(dig_port) & + BIT(dig_port->tc_mode); + intel_tc_port_unlock(dig_port); + + return is_connected; +} + +static void __intel_tc_port_lock(struct intel_digital_port *dig_port, + int required_lanes) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + intel_wakeref_t wakeref; + + wakeref = intel_display_power_get(i915, POWER_DOMAIN_DISPLAY_CORE); + + mutex_lock(&dig_port->tc_lock); + + if (!dig_port->tc_link_refcount && + intel_tc_port_needs_reset(dig_port)) + intel_tc_port_reset_mode(dig_port, required_lanes); + + WARN_ON(dig_port->tc_lock_wakeref); + dig_port->tc_lock_wakeref = wakeref; +} + +void intel_tc_port_lock(struct intel_digital_port *dig_port) +{ + __intel_tc_port_lock(dig_port, 1); +} + +void intel_tc_port_unlock(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + intel_wakeref_t wakeref = fetch_and_zero(&dig_port->tc_lock_wakeref); + + mutex_unlock(&dig_port->tc_lock); + + intel_display_power_put_async(i915, POWER_DOMAIN_DISPLAY_CORE, + wakeref); +} + +bool intel_tc_port_ref_held(struct intel_digital_port *dig_port) +{ + return mutex_is_locked(&dig_port->tc_lock) || + dig_port->tc_link_refcount; +} + +void intel_tc_port_get_link(struct intel_digital_port *dig_port, + int required_lanes) +{ + __intel_tc_port_lock(dig_port, required_lanes); + dig_port->tc_link_refcount++; + intel_tc_port_unlock(dig_port); +} + +void intel_tc_port_put_link(struct intel_digital_port *dig_port) +{ + mutex_lock(&dig_port->tc_lock); + dig_port->tc_link_refcount--; + mutex_unlock(&dig_port->tc_lock); +} + +void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum port port = dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(i915, port); + + if (WARN_ON(tc_port == PORT_TC_NONE)) + return; + + snprintf(dig_port->tc_port_name, sizeof(dig_port->tc_port_name), + "%c/TC#%d", port_name(port), tc_port + 1); + + mutex_init(&dig_port->tc_lock); + dig_port->tc_legacy_port = is_legacy; + dig_port->tc_link_refcount = 0; + tc_port_load_fia_params(i915, dig_port); +} diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h new file mode 100644 index 000000000000..463f1b3c836f --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_TC_H__ +#define __INTEL_TC_H__ + +#include <linux/mutex.h> +#include <linux/types.h> + +struct intel_digital_port; + +bool intel_tc_port_connected(struct intel_digital_port *dig_port); +u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); +int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); +void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, + int required_lanes); + +void intel_tc_port_sanitize(struct intel_digital_port *dig_port); +void intel_tc_port_lock(struct intel_digital_port *dig_port); +void intel_tc_port_unlock(struct intel_digital_port *dig_port); +void intel_tc_port_get_link(struct intel_digital_port *dig_port, + int required_lanes); +void intel_tc_port_put_link(struct intel_digital_port *dig_port); +bool intel_tc_port_ref_held(struct intel_digital_port *dig_port); + +void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy); + +#endif /* __INTEL_TC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 0a95df6c6a57..c75e0ceecee6 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -37,7 +37,7 @@ #include "i915_drv.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_hotplug.h" #include "intel_tv.h" @@ -898,7 +898,7 @@ static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) return container_of(encoder, struct intel_tv, base); } -static struct intel_tv *intel_attached_tv(struct drm_connector *connector) +static struct intel_tv *intel_attached_tv(struct intel_connector *connector) { return enc_to_tv(intel_attached_encoder(connector)); } @@ -924,7 +924,7 @@ intel_enable_tv(struct intel_encoder *encoder, /* Prevents vblank waits from timing out in intel_tv_detect_type() */ intel_wait_for_vblank(dev_priv, - to_intel_crtc(pipe_config->base.crtc)->pipe); + to_intel_crtc(pipe_config->uapi.crtc)->pipe); I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE); } @@ -961,11 +961,10 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; /* Ensure TV refresh is close to desired refresh */ - if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) - < 1000) - return MODE_OK; + if (abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) >= 1000) + return MODE_CLOCK_RANGE; - return MODE_CLOCK_RANGE; + return MODE_OK; } static int @@ -1086,7 +1085,7 @@ intel_tv_get_config(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; struct drm_display_mode mode = {}; u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp; struct tv_mode tv_mode = {}; @@ -1189,7 +1188,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; int hdisplay = adjusted_mode->crtc_hdisplay; int vdisplay = adjusted_mode->crtc_vdisplay; @@ -1418,7 +1417,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); const struct intel_tv_connector_state *tv_conn_state = to_intel_tv_connector_state(conn_state); @@ -1528,7 +1527,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, ((video_levels->black << TV_BLACK_LEVEL_SHIFT) | (video_levels->blank << TV_BLANK_LEVEL_SHIFT))); - assert_pipe_disabled(dev_priv, intel_crtc->pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); /* Filter ctl must be set before TV_WIN_SIZE */ tv_filter_ctl = TV_AUTO_SCALE; @@ -1663,7 +1662,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, */ static void intel_tv_find_better_format(struct drm_connector *connector) { - struct intel_tv *intel_tv = intel_attached_tv(connector); + struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; @@ -1690,7 +1689,7 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct intel_tv *intel_tv = intel_attached_tv(connector); + struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); enum drm_connector_status status; int type; @@ -1702,7 +1701,7 @@ intel_tv_detect(struct drm_connector *connector, struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, &tmp, ctx); if (ret < 0) return ret; @@ -1948,9 +1947,8 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->pipe_mask = ~0; intel_encoder->cloneable = 0; - intel_encoder->base.possible_crtcs = ((1 << 0) | (1 << 1)); intel_tv->type = DRM_MODE_CONNECTOR_Unknown; /* BIOS margin values */ diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 5ddbe71ab423..4d0c23b29248 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -114,6 +114,8 @@ enum bdb_block_id { BDB_LVDS_POWER = 44, BDB_MIPI_CONFIG = 52, BDB_MIPI_SEQUENCE = 53, + BDB_COMPRESSION_PARAMETERS = 56, + BDB_GENERIC_DTD = 58, BDB_SKIP = 254, /* VBIOS private block, ignore */ }; @@ -291,6 +293,8 @@ struct bdb_general_features { #define DVO_PORT_HDMIE 12 /* 193 */ #define DVO_PORT_DPF 13 /* N/A */ #define DVO_PORT_HDMIF 14 /* N/A */ +#define DVO_PORT_DPG 15 +#define DVO_PORT_HDMIG 16 #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -310,13 +314,13 @@ enum vbt_gmbus_ddi { DDC_BUS_DDI_F, ICL_DDC_BUS_DDI_A = 0x1, ICL_DDC_BUS_DDI_B, + TGL_DDC_BUS_DDI_C, ICL_DDC_BUS_PORT_1 = 0x4, ICL_DDC_BUS_PORT_2, ICL_DDC_BUS_PORT_3, ICL_DDC_BUS_PORT_4, - MCC_DDC_BUS_DDI_A = 0x1, - MCC_DDC_BUS_DDI_B, - MCC_DDC_BUS_DDI_C = 0x4, + TGL_DDC_BUS_PORT_5, + TGL_DDC_BUS_PORT_6, }; #define DP_AUX_A 0x40 @@ -325,6 +329,7 @@ enum vbt_gmbus_ddi { #define DP_AUX_D 0x30 #define DP_AUX_E 0x50 #define DP_AUX_F 0x60 +#define DP_AUX_G 0x70 #define VBT_DP_MAX_LINK_RATE_HBR3 0 #define VBT_DP_MAX_LINK_RATE_HBR2 1 @@ -364,7 +369,7 @@ struct child_device_config { u16 dtd_buf_ptr; /* 161 */ u8 edidless_efp:1; /* 161 */ u8 compression_enable:1; /* 198 */ - u8 compression_method:1; /* 198 */ + u8 compression_method_cps:1; /* 198 */ u8 ganged_edp:1; /* 202 */ u8 reserved0:4; u8 compression_structure_index:4; /* 198 */ @@ -789,6 +794,35 @@ struct bdb_lfp_backlight_data { } __packed; /* + * Block 44 - LFP Power Conservation Features Block + */ + +struct als_data_entry { + u16 backlight_adjust; + u16 lux; +} __packed; + +struct agressiveness_profile_entry { + u8 dpst_agressiveness : 4; + u8 lace_agressiveness : 4; +} __packed; + +struct bdb_lfp_power { + u8 lfp_feature_bits; + struct als_data_entry als[5]; + u8 lace_aggressiveness_profile; + u16 dpst; + u16 psr; + u16 drrs; + u16 lace_support; + u16 adt; + u16 dmrrs; + u16 adb; + u16 lace_enabled_status; + struct agressiveness_profile_entry aggressivenes[16]; +} __packed; + +/* * Block 52 - MIPI Configuration Block */ @@ -808,4 +842,85 @@ struct bdb_mipi_sequence { u8 data[0]; /* up to 6 variable length blocks */ } __packed; +/* + * Block 56 - Compression Parameters + */ + +#define VBT_RC_BUFFER_BLOCK_SIZE_1KB 0 +#define VBT_RC_BUFFER_BLOCK_SIZE_4KB 1 +#define VBT_RC_BUFFER_BLOCK_SIZE_16KB 2 +#define VBT_RC_BUFFER_BLOCK_SIZE_64KB 3 + +#define VBT_DSC_LINE_BUFFER_DEPTH(vbt_value) ((vbt_value) + 8) /* bits */ +#define VBT_DSC_MAX_BPP(vbt_value) (6 + (vbt_value) * 2) + +struct dsc_compression_parameters_entry { + u8 version_major:4; + u8 version_minor:4; + + u8 rc_buffer_block_size:2; + u8 reserved1:6; + + /* + * Buffer size in bytes: + * + * 4 ^ rc_buffer_block_size * 1024 * (rc_buffer_size + 1) bytes + */ + u8 rc_buffer_size; + u32 slices_per_line; + + u8 line_buffer_depth:4; + u8 reserved2:4; + + /* Flag Bits 1 */ + u8 block_prediction_enable:1; + u8 reserved3:7; + + u8 max_bpp; /* mapping */ + + /* Color depth capabilities */ + u8 reserved4:1; + u8 support_8bpc:1; + u8 support_10bpc:1; + u8 support_12bpc:1; + u8 reserved5:4; + + u16 slice_height; +} __packed; + +struct bdb_compression_parameters { + u16 entry_size; + struct dsc_compression_parameters_entry data[16]; +} __packed; + +/* + * Block 58 - Generic DTD Block + */ + +struct generic_dtd_entry { + u32 pixel_clock; + u16 hactive; + u16 hblank; + u16 hfront_porch; + u16 hsync; + u16 vactive; + u16 vblank; + u16 vfront_porch; + u16 vsync; + u16 width_mm; + u16 height_mm; + + /* Flags */ + u8 rsvd_flags:6; + u8 vsync_positive_polarity:1; + u8 hsync_positive_polarity:1; + + u8 rsvd[3]; +} __packed; + +struct bdb_generic_dtd { + u16 gdtd_size; + struct generic_dtd_entry dtd[]; /* up to 24 DTD's */ +} __packed; + #endif /* _INTEL_VBT_DEFS_H_ */ diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index ffec807b8960..9e6aaa302e40 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -9,7 +9,8 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" +#include "intel_dsi.h" #include "intel_vdsc.h" enum ROW_INDEX_BPP { @@ -30,10 +31,8 @@ enum COLUMN_INDEX_BPC { MAX_COLUMN_INDEX }; -#define DSC_SUPPORTED_VERSION_MIN 1 - /* From DSC_v1.11 spec, rc_parameter_Set syntax element typically constant */ -static u16 rc_buf_thresh[] = { +static const u16 rc_buf_thresh[] = { 896, 1792, 2688, 3584, 4480, 5376, 6272, 6720, 7168, 7616, 7744, 7872, 8000, 8064 }; @@ -53,7 +52,7 @@ struct rc_parameters { * Selected Rate Control Related Parameter Recommended Values * from DSC_v1.11 spec & C Model release: DSC_model_20161212 */ -static struct rc_parameters rc_params[][MAX_COLUMN_INDEX] = { +static const struct rc_parameters rc_parameters[][MAX_COLUMN_INDEX] = { { /* 6BPP/8BPC */ { 768, 15, 6144, 3, 13, 11, 11, { @@ -319,63 +318,84 @@ static int get_column_index_for_rc_params(u8 bits_per_component) } } -int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config) +static const struct rc_parameters *get_rc_params(u16 compressed_bpp, + u8 bits_per_component) +{ + int row_index, column_index; + + row_index = get_row_index_for_rc_params(compressed_bpp); + if (row_index < 0) + return NULL; + + column_index = get_column_index_for_rc_params(bits_per_component); + if (column_index < 0) + return NULL; + + return &rc_parameters[row_index][column_index]; +} + +bool intel_dsc_source_support(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_dsc_config *vdsc_cfg = &pipe_config->dp_dsc_cfg; - u16 compressed_bpp = pipe_config->dsc_params.compressed_bpp; + const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + enum pipe pipe = crtc->pipe; + + if (!INTEL_INFO(i915)->display.has_dsc) + return false; + + /* On TGL, DSC is supported on all Pipes */ + if (INTEL_GEN(i915) >= 12) + return true; + + if (INTEL_GEN(i915) >= 10 && + (pipe != PIPE_A || + (cpu_transcoder == TRANSCODER_EDP || + cpu_transcoder == TRANSCODER_DSI_0 || + cpu_transcoder == TRANSCODER_DSI_1))) + return true; + + return false; +} + +static bool is_pipe_dsc(const struct intel_crtc_state *crtc_state) +{ + const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_i915_private *i915 = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (INTEL_GEN(i915) >= 12) + return true; + + if (cpu_transcoder == TRANSCODER_EDP || + cpu_transcoder == TRANSCODER_DSI_0 || + cpu_transcoder == TRANSCODER_DSI_1) + return false; + + /* There's no pipe A DSC engine on ICL */ + WARN_ON(crtc->pipe == PIPE_A); + + return true; +} + +int intel_dsc_compute_params(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; + u16 compressed_bpp = pipe_config->dsc.compressed_bpp; + const struct rc_parameters *rc_params; u8 i = 0; - int row_index = 0; - int column_index = 0; - u8 line_buf_depth = 0; - vdsc_cfg->pic_width = pipe_config->base.adjusted_mode.crtc_hdisplay; - vdsc_cfg->pic_height = pipe_config->base.adjusted_mode.crtc_vdisplay; + vdsc_cfg->pic_width = pipe_config->hw.adjusted_mode.crtc_hdisplay; + vdsc_cfg->pic_height = pipe_config->hw.adjusted_mode.crtc_vdisplay; vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width, - pipe_config->dsc_params.slice_count); - /* - * Slice Height of 8 works for all currently available panels. So start - * with that if pic_height is an integral multiple of 8. - * Eventually add logic to try multiple slice heights. - */ - if (vdsc_cfg->pic_height % 8 == 0) - vdsc_cfg->slice_height = 8; - else if (vdsc_cfg->pic_height % 4 == 0) - vdsc_cfg->slice_height = 4; - else - vdsc_cfg->slice_height = 2; - - /* Values filled from DSC Sink DPCD */ - vdsc_cfg->dsc_version_major = - (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] & - DP_DSC_MAJOR_MASK) >> DP_DSC_MAJOR_SHIFT; - vdsc_cfg->dsc_version_minor = - min(DSC_SUPPORTED_VERSION_MIN, - (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] & - DP_DSC_MINOR_MASK) >> DP_DSC_MINOR_SHIFT); - - vdsc_cfg->convert_rgb = intel_dp->dsc_dpcd[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT] & - DP_DSC_RGB; - - line_buf_depth = drm_dp_dsc_sink_line_buf_depth(intel_dp->dsc_dpcd); - if (!line_buf_depth) { - DRM_DEBUG_KMS("DSC Sink Line Buffer Depth invalid\n"); - return -EINVAL; - } - if (vdsc_cfg->dsc_version_minor == 2) - vdsc_cfg->line_buf_depth = (line_buf_depth == DSC_1_2_MAX_LINEBUF_DEPTH_BITS) ? - DSC_1_2_MAX_LINEBUF_DEPTH_VAL : line_buf_depth; - else - vdsc_cfg->line_buf_depth = (line_buf_depth > DSC_1_1_MAX_LINEBUF_DEPTH_BITS) ? - DSC_1_1_MAX_LINEBUF_DEPTH_BITS : line_buf_depth; + pipe_config->dsc.slice_count); /* Gen 11 does not support YCbCr */ vdsc_cfg->simple_422 = false; /* Gen 11 does not support VBR */ vdsc_cfg->vbr_enable = false; - vdsc_cfg->block_pred_enable = - intel_dp->dsc_dpcd[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] & - DP_DSC_BLK_PREDICTION_IS_SUPPORTED; /* Gen 11 only supports integral values of bpp */ vdsc_cfg->bits_per_pixel = compressed_bpp << 4; @@ -399,39 +419,29 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, vdsc_cfg->rc_buf_thresh[13] = 0x7D; } - row_index = get_row_index_for_rc_params(compressed_bpp); - column_index = - get_column_index_for_rc_params(vdsc_cfg->bits_per_component); - - if (row_index < 0 || column_index < 0) + rc_params = get_rc_params(compressed_bpp, vdsc_cfg->bits_per_component); + if (!rc_params) return -EINVAL; - vdsc_cfg->first_line_bpg_offset = - rc_params[row_index][column_index].first_line_bpg_offset; - vdsc_cfg->initial_xmit_delay = - rc_params[row_index][column_index].initial_xmit_delay; - vdsc_cfg->initial_offset = - rc_params[row_index][column_index].initial_offset; - vdsc_cfg->flatness_min_qp = - rc_params[row_index][column_index].flatness_min_qp; - vdsc_cfg->flatness_max_qp = - rc_params[row_index][column_index].flatness_max_qp; - vdsc_cfg->rc_quant_incr_limit0 = - rc_params[row_index][column_index].rc_quant_incr_limit0; - vdsc_cfg->rc_quant_incr_limit1 = - rc_params[row_index][column_index].rc_quant_incr_limit1; + vdsc_cfg->first_line_bpg_offset = rc_params->first_line_bpg_offset; + vdsc_cfg->initial_xmit_delay = rc_params->initial_xmit_delay; + vdsc_cfg->initial_offset = rc_params->initial_offset; + vdsc_cfg->flatness_min_qp = rc_params->flatness_min_qp; + vdsc_cfg->flatness_max_qp = rc_params->flatness_max_qp; + vdsc_cfg->rc_quant_incr_limit0 = rc_params->rc_quant_incr_limit0; + vdsc_cfg->rc_quant_incr_limit1 = rc_params->rc_quant_incr_limit1; for (i = 0; i < DSC_NUM_BUF_RANGES; i++) { vdsc_cfg->rc_range_params[i].range_min_qp = - rc_params[row_index][column_index].rc_range_params[i].range_min_qp; + rc_params->rc_range_params[i].range_min_qp; vdsc_cfg->rc_range_params[i].range_max_qp = - rc_params[row_index][column_index].rc_range_params[i].range_max_qp; + rc_params->rc_range_params[i].range_max_qp; /* * Range BPG Offset uses 2's complement and is only a 6 bits. So * mask it to get only 6 bits. */ vdsc_cfg->rc_range_params[i].range_bpg_offset = - rc_params[row_index][column_index].rc_range_params[i].range_bpg_offset & + rc_params->rc_range_params[i].range_bpg_offset & DSC_RANGE_BPG_OFFSET_MASK; } @@ -453,39 +463,46 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, vdsc_cfg->initial_scale_value = (vdsc_cfg->rc_model_size << 3) / (vdsc_cfg->rc_model_size - vdsc_cfg->initial_offset); - return drm_dsc_compute_rc_parameters(vdsc_cfg); + return 0; } enum intel_display_power_domain intel_dsc_power_domain(const struct intel_crtc_state *crtc_state) { - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; /* - * On ICL VDSC/joining for eDP transcoder uses a separate power well PW2 - * This requires POWER_DOMAIN_TRANSCODER_EDP_VDSC power domain. - * For any other transcoder, VDSC/joining uses the power well associated - * with the pipe/transcoder in use. Hence another reference on the - * transcoder power domain will suffice. + * VDSC/joining uses a separate power well, PW2, and requires + * POWER_DOMAIN_TRANSCODER_VDSC_PW2 power domain in two cases: + * + * - ICL eDP/DSI transcoder + * - TGL pipe A + * + * For any other pipe, VDSC/joining uses the power well associated with + * the pipe in use. Hence another reference on the pipe power domain + * will suffice. (Except no VDSC/joining on ICL pipe A.) */ - if (cpu_transcoder == TRANSCODER_EDP) - return POWER_DOMAIN_TRANSCODER_EDP_VDSC; + if (INTEL_GEN(i915) >= 12 && pipe == PIPE_A) + return POWER_DOMAIN_TRANSCODER_VDSC_PW2; + else if (is_pipe_dsc(crtc_state)) + return POWER_DOMAIN_PIPE(pipe); else - return POWER_DOMAIN_TRANSCODER(cpu_transcoder); + return POWER_DOMAIN_TRANSCODER_VDSC_PW2; } -static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static void intel_dsc_pps_configure(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; enum pipe pipe = crtc->pipe; - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 pps_val = 0; u32 rc_buf_thresh_dword[4]; u32 rc_range_params_dword[8]; - u8 num_vdsc_instances = (crtc_state->dsc_params.dsc_split) ? 2 : 1; + u8 num_vdsc_instances = (crtc_state->dsc.dsc_split) ? 2 : 1; int i = 0; /* Populate PICTURE_PARAMETER_SET_0 registers */ @@ -502,17 +519,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, if (vdsc_cfg->vbr_enable) pps_val |= DSC_VBR_ENABLE; DRM_INFO("PPS0 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_0, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_0, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_0(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_0(pipe), pps_val); } @@ -521,17 +538,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val = 0; pps_val |= DSC_BPP(vdsc_cfg->bits_per_pixel); DRM_INFO("PPS1 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_1, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_1, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_1(pipe), pps_val); } @@ -541,17 +558,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_PIC_HEIGHT(vdsc_cfg->pic_height) | DSC_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances); DRM_INFO("PPS2 = 0x%08x\n", pps_val); - if (encoder->type == INTEL_OUTPUT_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_2, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_2, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_2(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_2(pipe), pps_val); } @@ -561,17 +578,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_SLICE_HEIGHT(vdsc_cfg->slice_height) | DSC_SLICE_WIDTH(vdsc_cfg->slice_width); DRM_INFO("PPS3 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_3, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_3, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_3(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_3(pipe), pps_val); } @@ -581,17 +598,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_INITIAL_XMIT_DELAY(vdsc_cfg->initial_xmit_delay) | DSC_INITIAL_DEC_DELAY(vdsc_cfg->initial_dec_delay); DRM_INFO("PPS4 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_4, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_4, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_4(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe), pps_val); } @@ -601,17 +618,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_SCALE_INC_INT(vdsc_cfg->scale_increment_interval) | DSC_SCALE_DEC_INT(vdsc_cfg->scale_decrement_interval); DRM_INFO("PPS5 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_5, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_5, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_5(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe), pps_val); } @@ -623,17 +640,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, DSC_FLATNESS_MIN_QP(vdsc_cfg->flatness_min_qp) | DSC_FLATNESS_MAX_QP(vdsc_cfg->flatness_max_qp); DRM_INFO("PPS6 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_6, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_6, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_6(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_6(pipe), pps_val); } @@ -643,17 +660,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_SLICE_BPG_OFFSET(vdsc_cfg->slice_bpg_offset) | DSC_NFL_BPG_OFFSET(vdsc_cfg->nfl_bpg_offset); DRM_INFO("PPS7 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_7, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_7, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_7(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_7(pipe), pps_val); } @@ -663,17 +680,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_FINAL_OFFSET(vdsc_cfg->final_offset) | DSC_INITIAL_OFFSET(vdsc_cfg->initial_offset); DRM_INFO("PPS8 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_8, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_8, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_8(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_8(pipe), pps_val); } @@ -683,17 +700,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_RC_MODEL_SIZE(DSC_RC_MODEL_SIZE_CONST) | DSC_RC_EDGE_FACTOR(DSC_RC_EDGE_FACTOR_CONST); DRM_INFO("PPS9 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_9, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_9, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_9(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_9(pipe), pps_val); } @@ -705,17 +722,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, DSC_RC_TARGET_OFF_HIGH(DSC_RC_TGT_OFFSET_HI_CONST) | DSC_RC_TARGET_OFF_LOW(DSC_RC_TGT_OFFSET_LO_CONST); DRM_INFO("PPS10 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_10, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_10, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_10(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_10(pipe), pps_val); } @@ -728,17 +745,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, DSC_SLICE_ROW_PER_FRAME(vdsc_cfg->pic_height / vdsc_cfg->slice_height); DRM_INFO("PPS16 = 0x%08x\n", pps_val); - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_16, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_16, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_16(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_16(pipe), pps_val); } @@ -752,12 +769,12 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, DRM_INFO(" RC_BUF_THRESH%d = 0x%08x\n", i, rc_buf_thresh_dword[i / 4]); } - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); I915_WRITE(DSCA_RC_BUF_THRESH_0_UDW, rc_buf_thresh_dword[1]); I915_WRITE(DSCA_RC_BUF_THRESH_1, rc_buf_thresh_dword[2]); I915_WRITE(DSCA_RC_BUF_THRESH_1_UDW, rc_buf_thresh_dword[3]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(DSCC_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); I915_WRITE(DSCC_RC_BUF_THRESH_0_UDW, @@ -776,7 +793,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_buf_thresh_dword[2]); I915_WRITE(ICL_DSC0_RC_BUF_THRESH_1_UDW(pipe), rc_buf_thresh_dword[3]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0(pipe), rc_buf_thresh_dword[0]); I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0_UDW(pipe), @@ -801,7 +818,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, DRM_INFO(" RC_RANGE_PARAM_%d = 0x%08x\n", i, rc_range_params_dword[i / 2]); } - if (cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { I915_WRITE(DSCA_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); I915_WRITE(DSCA_RC_RANGE_PARAMETERS_0_UDW, @@ -818,7 +835,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(DSCA_RC_RANGE_PARAMETERS_3_UDW, rc_range_params_dword[7]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0_UDW, @@ -853,7 +870,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(ICL_DSC0_RC_RANGE_PARAMETERS_3_UDW(pipe), rc_range_params_dword[7]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0(pipe), rc_range_params_dword[0]); I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0_UDW(pipe), @@ -874,12 +891,79 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, } } -static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +void intel_dsc_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = crtc->pipe; + enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; + u32 dss_ctl1, dss_ctl2, val; + + if (!intel_dsc_source_support(encoder, crtc_state)) + return; + + power_domain = intel_dsc_power_domain(crtc_state); + + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) + return; + + if (!is_pipe_dsc(crtc_state)) { + dss_ctl1 = I915_READ(DSS_CTL1); + dss_ctl2 = I915_READ(DSS_CTL2); + } else { + dss_ctl1 = I915_READ(ICL_PIPE_DSS_CTL1(pipe)); + dss_ctl2 = I915_READ(ICL_PIPE_DSS_CTL2(pipe)); + } + + crtc_state->dsc.compression_enable = dss_ctl2 & LEFT_BRANCH_VDSC_ENABLE; + if (!crtc_state->dsc.compression_enable) + goto out; + + crtc_state->dsc.dsc_split = (dss_ctl2 & RIGHT_BRANCH_VDSC_ENABLE) && + (dss_ctl1 & JOINER_ENABLE); + + /* FIXME: add more state readout as needed */ + + /* PPS1 */ + if (!is_pipe_dsc(crtc_state)) + val = I915_READ(DSCA_PICTURE_PARAMETER_SET_1); + else + val = I915_READ(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe)); + vdsc_cfg->bits_per_pixel = val; + crtc_state->dsc.compressed_bpp = vdsc_cfg->bits_per_pixel >> 4; +out: + intel_display_power_put(dev_priv, power_domain, wakeref); +} + +static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct mipi_dsi_device *dsi; + struct drm_dsc_picture_parameter_set pps; + enum port port; + + drm_dsc_pps_payload_pack(&pps, vdsc_cfg); + + for_each_dsi_port(port, intel_dsi->ports) { + dsi = intel_dsi->dsi_hosts[port]->device; + + mipi_dsi_picture_parameter_set(dsi, &pps); + mipi_dsi_compression_mode(dsi, true); + } +} + +static void intel_dsc_dp_pps_write(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; struct drm_dsc_pps_infoframe dp_dsc_pps_sdp; /* Prepare DP SDP PPS header as per DP 1.4 spec, Table 2-123 */ @@ -896,25 +980,28 @@ static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder, void intel_dsc_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1_val = 0; u32 dss_ctl2_val = 0; - if (!crtc_state->dsc_params.compression_enable) + if (!crtc_state->dsc.compression_enable) return; /* Enable Power wells for VDSC/joining */ intel_display_power_get(dev_priv, intel_dsc_power_domain(crtc_state)); - intel_configure_pps_for_dsc_encoder(encoder, crtc_state); + intel_dsc_pps_configure(encoder, crtc_state); - intel_dp_write_dsc_pps_sdp(encoder, crtc_state); + if (encoder->type == INTEL_OUTPUT_DSI) + intel_dsc_dsi_pps_write(encoder, crtc_state); + else + intel_dsc_dp_pps_write(encoder, crtc_state); - if (crtc_state->cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(crtc_state)) { dss_ctl1_reg = DSS_CTL1; dss_ctl2_reg = DSS_CTL2; } else { @@ -922,7 +1009,7 @@ void intel_dsc_enable(struct intel_encoder *encoder, dss_ctl2_reg = ICL_PIPE_DSS_CTL2(pipe); } dss_ctl2_val |= LEFT_BRANCH_VDSC_ENABLE; - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { dss_ctl2_val |= RIGHT_BRANCH_VDSC_ENABLE; dss_ctl1_val |= JOINER_ENABLE; } @@ -932,16 +1019,16 @@ void intel_dsc_enable(struct intel_encoder *encoder, void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1_val = 0, dss_ctl2_val = 0; - if (!old_crtc_state->dsc_params.compression_enable) + if (!old_crtc_state->dsc.compression_enable) return; - if (old_crtc_state->cpu_transcoder == TRANSCODER_EDP) { + if (!is_pipe_dsc(old_crtc_state)) { dss_ctl1_reg = DSS_CTL1; dss_ctl2_reg = DSS_CTL2; } else { diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 90d3f6017fcb..e56a3254c214 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -6,15 +6,20 @@ #ifndef __INTEL_VDSC_H__ #define __INTEL_VDSC_H__ +#include <linux/types.h> + struct intel_encoder; struct intel_crtc_state; -struct intel_dp; +bool intel_dsc_source_support(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_dsc_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_dsc_disable(const struct intel_crtc_state *crtc_state); -int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config); +int intel_dsc_compute_params(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config); +void intel_dsc_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_dsc_power_domain(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c new file mode 100644 index 000000000000..2ff7293986d4 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/pci.h> +#include <linux/vgaarb.h> + +#include <drm/i915_drm.h> + +#include "i915_drv.h" +#include "intel_vga.h" + +static i915_reg_t intel_vga_cntrl_reg(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + return VLV_VGACNTRL; + else if (INTEL_GEN(i915) >= 5) + return CPU_VGACNTRL; + else + return VGACNTRL; +} + +/* Disable the VGA plane that we never use */ +void intel_vga_disable(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + u8 sr1; + + /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(SR01, VGA_SR_INDEX); + sr1 = inb(VGA_SR_DATA); + outb(sr1 | 1 << 5, VGA_SR_DATA); + vga_put(pdev, VGA_RSRC_LEGACY_IO); + udelay(300); + + I915_WRITE(vga_reg, VGA_DISP_DISABLE); + POSTING_READ(vga_reg); +} + +void intel_vga_redisable_power_on(struct drm_i915_private *dev_priv) +{ + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + + if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + intel_vga_disable(dev_priv); + } +} + +void intel_vga_redisable(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + /* + * This function can be called both from intel_modeset_setup_hw_state or + * at a very early point in our resume sequence, where the power well + * structures are not yet restored. Since this function is at a very + * paranoid "someone might have enabled VGA while we were not looking" + * level, just check if the power well is enabled instead of trying to + * follow the "don't touch the power well if we don't need it" policy + * the rest of the driver uses. + */ + wakeref = intel_display_power_get_if_enabled(i915, POWER_DOMAIN_VGA); + if (!wakeref) + return; + + intel_vga_redisable_power_on(i915); + + intel_display_power_put(i915, POWER_DOMAIN_VGA, wakeref); +} + +void intel_vga_reset_io_mem(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* + * After we re-enable the power well, if we touch VGA register 0x3d5 + * we'll get unclaimed register interrupts. This stops after we write + * anything to the VGA MSR register. The vgacon module uses this + * register all the time, so if we unbind our driver and, as a + * consequence, bind vgacon, we'll get stuck in an infinite loop at + * console_unlock(). So make here we touch the VGA MSR register, making + * sure vgacon can keep working normally without triggering interrupts + * and error messages. + */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); + vga_put(pdev, VGA_RSRC_LEGACY_IO); +} + +static int +intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode) +{ + unsigned int reg = INTEL_GEN(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; + u16 gmch_ctrl; + + if (pci_read_config_word(i915->bridge_dev, reg, &gmch_ctrl)) { + DRM_ERROR("failed to read control word\n"); + return -EIO; + } + + if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !enable_decode) + return 0; + + if (enable_decode) + gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; + else + gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; + + if (pci_write_config_word(i915->bridge_dev, reg, gmch_ctrl)) { + DRM_ERROR("failed to write control word\n"); + return -EIO; + } + + return 0; +} + +static unsigned int +intel_vga_set_decode(void *cookie, bool enable_decode) +{ + struct drm_i915_private *i915 = cookie; + + intel_vga_set_state(i915, enable_decode); + + if (enable_decode) + return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + else + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; +} + +int intel_vga_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + int ret; + + /* + * If we have > 1 VGA cards, then we need to arbitrate access to the + * common VGA resources. + * + * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), + * then we do not take part in VGA arbitration and the + * vga_client_register() fails with -ENODEV. + */ + ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode); + if (ret && ret != -ENODEV) + return ret; + + return 0; +} + +void intel_vga_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_client_register(pdev, NULL, NULL, NULL); +} diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h new file mode 100644 index 000000000000..ba5b55b917f0 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_VGA_H__ +#define __INTEL_VGA_H__ + +struct drm_i915_private; + +void intel_vga_reset_io_mem(struct drm_i915_private *i915); +void intel_vga_disable(struct drm_i915_private *i915); +void intel_vga_redisable(struct drm_i915_private *i915); +void intel_vga_redisable_power_on(struct drm_i915_private *i915); +int intel_vga_register(struct drm_i915_private *i915); +void intel_vga_unregister(struct drm_i915_private *i915); + +#endif /* __INTEL_VGA_H__ */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index e272d826210a..daf4fc3dab6f 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -23,7 +23,6 @@ * Author: Jani Nikula <jani.nikula@intel.com> */ -#include <linux/gpio/consumer.h> #include <linux/slab.h> #include <drm/drm_atomic_helper.h> @@ -34,7 +33,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dsi.h" #include "intel_fifo_underrun.h" #include "intel_panel.h" @@ -84,9 +83,8 @@ void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port) mask = LP_CTRL_FIFO_EMPTY | HS_CTRL_FIFO_EMPTY | LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_GEN_FIFO_STAT(port), mask, mask, - 100)) + if (intel_de_wait_for_set(dev_priv, MIPI_GEN_FIFO_STAT(port), + mask, 100)) DRM_ERROR("DPI FIFOs are not empty\n"); } @@ -154,10 +152,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, /* note: this is never true for reads */ if (packet.payload_length) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_GEN_FIFO_STAT(port), - data_mask, 0, - 50)) + if (intel_de_wait_for_clear(dev_priv, MIPI_GEN_FIFO_STAT(port), + data_mask, 50)) DRM_ERROR("Timeout waiting for HS/LP DATA FIFO !full\n"); write_data(dev_priv, data_reg, packet.payload, @@ -168,10 +164,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, I915_WRITE(MIPI_INTR_STAT(port), GEN_READ_DATA_AVAIL); } - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_GEN_FIFO_STAT(port), - ctrl_mask, 0, - 50)) { + if (intel_de_wait_for_clear(dev_priv, MIPI_GEN_FIFO_STAT(port), + ctrl_mask, 50)) { DRM_ERROR("Timeout waiting for HS/LP CTRL FIFO !full\n"); } @@ -180,10 +174,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, /* ->rx_len is set only for reads */ if (msg->rx_len) { data_mask = GEN_READ_DATA_AVAIL; - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_INTR_STAT(port), - data_mask, data_mask, - 50)) + if (intel_de_wait_for_set(dev_priv, MIPI_INTR_STAT(port), + data_mask, 50)) DRM_ERROR("Timeout waiting for read data.\n"); read_data(dev_priv, data_reg, msg->rx_buf, msg->rx_len); @@ -240,9 +232,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, I915_WRITE(MIPI_DPI_CONTROL(port), cmd); mask = SPL_PKT_SENT_INTERRUPT; - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_INTR_STAT(port), mask, mask, - 100)) + if (intel_de_wait_for_set(dev_priv, MIPI_INTR_STAT(port), mask, 100)) DRM_ERROR("Video mode command 0x%08x send failed.\n", cmd); return 0; @@ -270,9 +260,9 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, base); struct intel_connector *intel_connector = intel_dsi->attached_connector; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int ret; DRM_DEBUG_KMS("\n"); @@ -328,7 +318,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, static bool glk_dsi_enable_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; bool cold_boot = false; @@ -359,11 +349,8 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder) /* Wait for Pwr ACK */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_MIPIIO_PORT_POWERED, - GLK_MIPIIO_PORT_POWERED, - 20)) + if (intel_de_wait_for_set(dev_priv, MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 20)) DRM_ERROR("MIPIO port is powergated\n"); } @@ -379,17 +366,14 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder) static void glk_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; /* Wait for MIPI PHY status bit to set */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_PHY_STATUS_PORT_READY, - GLK_PHY_STATUS_PORT_READY, - 20)) + if (intel_de_wait_for_set(dev_priv, MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 20)) DRM_ERROR("PHY is not ON\n"); } @@ -413,11 +397,8 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) I915_WRITE(MIPI_DEVICE_READY(port), val); /* Wait for ULPS active */ - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_ULPS_NOT_ACTIVE, - 0, - 20)) + if (intel_de_wait_for_clear(dev_priv, MIPI_CTRL(port), + GLK_ULPS_NOT_ACTIVE, 20)) DRM_ERROR("ULPS not active\n"); /* Exit ULPS */ @@ -440,21 +421,15 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) /* Wait for Stop state */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_DATA_LANE_STOP_STATE, - GLK_DATA_LANE_STOP_STATE, - 20)) + if (intel_de_wait_for_set(dev_priv, MIPI_CTRL(port), + GLK_DATA_LANE_STOP_STATE, 20)) DRM_ERROR("Date lane not in STOP state\n"); } /* Wait for AFE LATCH */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - BXT_MIPI_PORT_CTRL(port), - AFE_LATCHOUT, - AFE_LATCHOUT, - 20)) + if (intel_de_wait_for_set(dev_priv, BXT_MIPI_PORT_CTRL(port), + AFE_LATCHOUT, 20)) DRM_ERROR("D-PHY not entering LP-11 state\n"); } } @@ -462,7 +437,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) static void bxt_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -489,7 +464,7 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder) static void vlv_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -540,7 +515,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -554,17 +529,15 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) /* Wait for MIPI PHY status bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_PHY_STATUS_PORT_READY, 0, 20)) + if (intel_de_wait_for_clear(dev_priv, MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 20)) DRM_ERROR("PHY is not turning OFF\n"); } /* Wait for Pwr ACK bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_MIPIIO_PORT_POWERED, 0, 20)) + if (intel_de_wait_for_clear(dev_priv, MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 20)) DRM_ERROR("MIPI IO Port is not powergated\n"); } } @@ -572,7 +545,7 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -583,9 +556,8 @@ static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) /* Wait for MIPI PHY status bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_wait_for_register(&dev_priv->uncore, - MIPI_CTRL(port), - GLK_PHY_STATUS_PORT_READY, 0, 20)) + if (intel_de_wait_for_clear(dev_priv, MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 20)) DRM_ERROR("PHY is not turning OFF\n"); } @@ -606,7 +578,7 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; DRM_DEBUG_KMS("\n"); @@ -633,9 +605,8 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) * Port A only. MIPI Port C has no similar bit for checking. */ if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && - intel_wait_for_register(&dev_priv->uncore, - port_ctrl, AFE_LATCHOUT, 0, - 30)) + intel_de_wait_for_clear(dev_priv, port_ctrl, + AFE_LATCHOUT, 30)) DRM_ERROR("DSI LP not going Low\n"); /* Disable MIPI PHY transparent latch */ @@ -652,8 +623,8 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { @@ -709,7 +680,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; for_each_dsi_port(port, intel_dsi->ports) { @@ -773,11 +744,11 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - struct drm_crtc *crtc = pipe_config->base.crtc; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct drm_crtc *crtc = pipe_config->uapi.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; @@ -821,9 +792,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (!IS_GEMINILAKE(dev_priv)) intel_dsi_prepare(encoder, pipe_config); - /* Power on, try both CRC pmic gpio and VBT */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); @@ -878,7 +846,7 @@ static void intel_dsi_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; DRM_DEBUG_KMS("\n"); @@ -910,16 +878,22 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) } static void intel_dsi_post_disable(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; DRM_DEBUG_KMS("\n"); + if (IS_GEN9_LP(dev_priv)) { + intel_crtc_vblank_off(old_crtc_state); + + skl_scaler_disable(old_crtc_state); + } + if (is_vid_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) vlv_dsi_wait_for_fifo_empty(intel_dsi, port); @@ -967,11 +941,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, /* Assert reset */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - /* Power off, try both CRC pmic gpio and VBT */ intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); /* * FIXME As we do with eDP, just make a note of the time here @@ -984,7 +955,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); intel_wakeref_t wakeref; enum port port; bool active = false; @@ -1060,10 +1031,10 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_display_mode *adjusted_mode = - &pipe_config->base.adjusted_mode; + &pipe_config->hw.adjusted_mode; struct drm_display_mode *adjusted_mode_sw; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); unsigned int lane_count = intel_dsi->lane_count; unsigned int bpp, fmt; enum port port; @@ -1073,7 +1044,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, crtc_hblank_start_sw, crtc_hblank_end_sw; /* FIXME: hw readout should not depend on SW state */ - adjusted_mode_sw = &crtc->config->base.adjusted_mode; + adjusted_mode_sw = &crtc->config->hw.adjusted_mode; /* * Atleast one port is active as encoder->get_config called only if @@ -1232,7 +1203,7 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, } if (pclk) { - pipe_config->base.adjusted_mode.crtc_clock = pclk; + pipe_config->hw.adjusted_mode.crtc_clock = pclk; pipe_config->port_clock = pclk; } } @@ -1256,7 +1227,7 @@ static void set_dsi_timings(struct drm_encoder *encoder, { struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); enum port port; unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); unsigned int lane_count = intel_dsi->lane_count; @@ -1343,9 +1314,9 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); + const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; enum port port; unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 val, tmp; @@ -1534,7 +1505,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, static void intel_dsi_unprepare(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -1561,12 +1532,9 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - - /* dispose of the gpios */ - if (intel_dsi->gpio_panel) - gpiod_put(intel_dsi->gpio_panel); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); + intel_dsi_vbt_gpio_cleanup(intel_dsi); intel_encoder_destroy(encoder); } @@ -1644,7 +1612,7 @@ vlv_dsi_get_panel_orientation(struct intel_connector *connector) return intel_dsi_get_panel_orientation(connector); } -static void intel_dsi_add_properties(struct intel_connector *connector) +static void vlv_dsi_add_properties(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -1847,6 +1815,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *current_mode, *fixed_mode; enum port port; + enum pipe pipe; DRM_DEBUG_KMS("\n"); @@ -1898,11 +1867,11 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * port C. BXT isn't limited like this. */ if (IS_GEN9_LP(dev_priv)) - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = ~0; else if (port == PORT_A) - intel_encoder->crtc_mask = BIT(PIPE_A); + intel_encoder->pipe_mask = BIT(PIPE_A); else - intel_encoder->crtc_mask = BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_B); if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); @@ -1945,20 +1914,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) vlv_dphy_param_init(intel_dsi); - /* - * In case of BYT with CRC PMIC, we need to use GPIO for - * Panel control. - */ - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { - intel_dsi->gpio_panel = - gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); - - if (IS_ERR(intel_dsi->gpio_panel)) { - DRM_ERROR("Failed to own gpio for panel control\n"); - intel_dsi->gpio_panel = NULL; - } - } + intel_dsi_vbt_gpio_init(intel_dsi, + intel_dsi_get_hw_state(intel_encoder, &pipe)); drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); @@ -1983,7 +1940,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_panel_init(&intel_connector->panel, fixed_mode, NULL); intel_panel_setup_backlight(connector, INVALID_PIPE); - intel_dsi_add_properties(intel_connector); + vlv_dsi_add_properties(intel_connector); return; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index f016a776a39e..6b89e67b120f 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -28,7 +28,7 @@ #include <linux/kernel.h> #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_display_types.h" #include "intel_dsi.h" #include "intel_sideband.h" @@ -117,7 +117,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); int ret; u32 dsi_clk; @@ -246,11 +246,8 @@ void bxt_dsi_pll_disable(struct intel_encoder *encoder) * PLL lock should deassert within 200us. * Wait up to 1ms before timing out. */ - if (intel_wait_for_register(&dev_priv->uncore, - BXT_DSI_PLL_ENABLE, - BXT_DSI_PLL_LOCKED, - 0, - 1)) + if (intel_de_wait_for_clear(dev_priv, BXT_DSI_PLL_ENABLE, + BXT_DSI_PLL_LOCKED, 1)) DRM_ERROR("Timeout waiting for PLL lock deassertion\n"); } @@ -258,7 +255,7 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 dsi_clock, pclk; u32 pll_ctl, pll_div; @@ -324,7 +321,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, u32 pclk; u32 dsi_clk; u32 dsi_ratio; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -344,7 +341,7 @@ void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) { u32 temp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); temp = I915_READ(MIPI_CTRL(port)); temp &= ~ESCAPE_CLOCK_DIVIDER_MASK; @@ -458,7 +455,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; @@ -506,7 +503,7 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -530,11 +527,8 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder, I915_WRITE(BXT_DSI_PLL_ENABLE, val); /* Timeout and fail if PLL not locked */ - if (intel_wait_for_register(&dev_priv->uncore, - BXT_DSI_PLL_ENABLE, - BXT_DSI_PLL_LOCKED, - BXT_DSI_PLL_LOCKED, - 1)) { + if (intel_de_wait_for_set(dev_priv, BXT_DSI_PLL_ENABLE, + BXT_DSI_PLL_LOCKED, 1)) { DRM_ERROR("Timed out waiting for DSI PLL to lock\n"); return; } diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile deleted file mode 100644 index 07e7b8b840ea..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile +++ /dev/null @@ -1 +0,0 @@ -include $(src)/Makefile.header-test # Extra header tests diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test deleted file mode 100644 index 61e06cbb4b32..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(wildcard $(src)/*.h)) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6ad93a09968c..25235ef630c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -9,16 +9,16 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" -static __always_inline u32 __busy_read_flag(u8 id) +static __always_inline u32 __busy_read_flag(u16 id) { - if (id == (u8)I915_ENGINE_CLASS_INVALID) + if (id == (u16)I915_ENGINE_CLASS_INVALID) return 0xffff0000u; GEM_BUG_ON(id >= 16); return 0x10000u << id; } -static __always_inline u32 __busy_write_id(u8 id) +static __always_inline u32 __busy_write_id(u16 id) { /* * The uABI guarantees an active writer is also amongst the read @@ -29,14 +29,14 @@ static __always_inline u32 __busy_write_id(u8 id) * last_read - hence we always set both read and write busy for * last_write. */ - if (id == (u8)I915_ENGINE_CLASS_INVALID) + if (id == (u16)I915_ENGINE_CLASS_INVALID) return 0xffffffffu; return (id + 1) | __busy_read_flag(id); } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id)) +__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -57,7 +57,7 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id)) return 0; /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class)); + BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); return flag(rq->engine->uabi_class); } @@ -82,7 +82,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct reservation_object_list *list; + struct dma_resv_list *list; unsigned int seq; int err; @@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !reservation_object_test_signaled_rcu(obj->resv, true); + * !dma_resv_test_signaled_rcu(obj->resv, true); * to report the overall busyness. This is what the wait-ioctl does. * */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 5295285d5843..34be4c0ee7c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -8,87 +8,65 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" - -static DEFINE_SPINLOCK(clflush_lock); +#include "i915_sw_fence_work.h" +#include "i915_trace.h" struct clflush { - struct dma_fence dma; /* Must be first for dma_fence_free() */ - struct i915_sw_fence wait; - struct work_struct work; + struct dma_fence_work base; struct drm_i915_gem_object *obj; }; -static const char *i915_clflush_get_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) -{ - return "clflush"; -} - -static void i915_clflush_release(struct dma_fence *fence) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), dma); - - i915_sw_fence_fini(&clflush->wait); - - BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); - dma_fence_free(&clflush->dma); -} - -static const struct dma_fence_ops i915_clflush_ops = { - .get_driver_name = i915_clflush_get_driver_name, - .get_timeline_name = i915_clflush_get_timeline_name, - .release = i915_clflush_release, -}; - -static void __i915_do_clflush(struct drm_i915_gem_object *obj) +static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); - intel_fb_obj_flush(obj, ORIGIN_CPU); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static void i915_clflush_work(struct work_struct *work) +static int clflush_work(struct dma_fence_work *base) { - struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct clflush *clflush = container_of(base, typeof(*clflush), base); struct drm_i915_gem_object *obj = clflush->obj; + int err; - if (i915_gem_object_pin_pages(obj)) { - DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); - goto out; - } - - __i915_do_clflush(obj); + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + __do_clflush(obj); i915_gem_object_unpin_pages(obj); -out: - i915_gem_object_put(obj); + return 0; +} + +static void clflush_release(struct dma_fence_work *base) +{ + struct clflush *clflush = container_of(base, typeof(*clflush), base); - dma_fence_signal(&clflush->dma); - dma_fence_put(&clflush->dma); + i915_gem_object_put(clflush->obj); } -static int __i915_sw_fence_call -i915_clflush_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) +static const struct dma_fence_work_ops clflush_ops = { + .name = "clflush", + .work = clflush_work, + .release = clflush_release, +}; + +static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { - struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + struct clflush *clflush; - switch (state) { - case FENCE_COMPLETE: - schedule_work(&clflush->work); - break; + GEM_BUG_ON(!obj->cache_dirty); - case FENCE_FREE: - dma_fence_put(&clflush->dma); - break; - } + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (!clflush) + return NULL; - return NOTIFY_DONE; + dma_fence_work_init(&clflush->base, &clflush_ops); + clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */ + + return clflush; } bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, @@ -126,33 +104,16 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, clflush = NULL; if (!(flags & I915_CLFLUSH_SYNC)) - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + clflush = clflush_work_create(obj); if (clflush) { - GEM_BUG_ON(!obj->cache_dirty); - - dma_fence_init(&clflush->dma, - &i915_clflush_ops, - &clflush_lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); - i915_sw_fence_init(&clflush->wait, i915_clflush_notify); - - clflush->obj = i915_gem_object_get(obj); - INIT_WORK(&clflush->work, i915_clflush_work); - - dma_fence_get(&clflush->dma); - - i915_sw_fence_await_reservation(&clflush->wait, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + i915_sw_fence_await_reservation(&clflush->base.chain, + obj->base.resv, NULL, true, + I915_FENCE_TIMEOUT, I915_FENCE_GFP); - - reservation_object_add_excl_fence(obj->base.resv, - &clflush->dma); - - i915_sw_fence_commit(&clflush->wait); + dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); + dma_fence_work_commit(&clflush->base); } else if (obj->mm.pages) { - __i915_do_clflush(obj); + __do_clflush(obj); } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 1fdab0767a47..81366aa4812b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -2,10 +2,13 @@ /* * Copyright © 2019 Intel Corporation */ -#include "i915_gem_client_blt.h" +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_gem_client_blt.h" #include "i915_gem_object_blt.h" -#include "intel_drv.h" struct i915_sleeve { struct i915_vma *vma; @@ -72,7 +75,6 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, vma->ops = &proxy_vma_ops; sleeve->vma = vma; - sleeve->obj = i915_gem_object_get(obj); sleeve->pages = pages; sleeve->page_sizes = *page_sizes; @@ -85,7 +87,6 @@ err_free: static void destroy_sleeve(struct i915_sleeve *sleeve) { - i915_gem_object_put(sleeve->obj); kfree(sleeve); } @@ -154,32 +155,37 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->gem_context->i915; - struct drm_i915_gem_object *obj = w->sleeve->obj; + struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; + struct i915_vma *batch; int err = w->dma.error; if (unlikely(err)) goto out_signal; if (obj->cache_dirty) { - obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(w->sleeve->pages); obj->cache_dirty = false; } + obj->read_domains = I915_GEM_GPU_DOMAINS; + obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; + + batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } - rq = i915_request_create(w->ce); + rq = intel_context_create_request(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } /* There's no way the fence has signalled */ @@ -187,20 +193,28 @@ static void clear_pages_worker(struct work_struct *work) clear_pages_dma_fence_cb)) GEM_BUG_ON(1); + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + if (w->ce->engine->emit_init_breadcrumb) { err = w->ce->engine->emit_init_breadcrumb(rq); if (unlikely(err)) goto out_request; } - /* XXX: more feverish nightmares await */ - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + /* + * w->dma is already exported via (vma|obj)->resv we need only + * keep track of the GPU activity within this vma/request, and + * propagate the signal from the request to w->dma. + */ + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, w->value); + err = w->ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) { i915_request_skip(rq, err); @@ -208,10 +222,10 @@ out_request: } i915_request_add(rq); +out_batch: + intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); @@ -248,14 +262,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, struct i915_page_sizes *page_sizes, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct clear_pages_work *work; struct i915_sleeve *sleeve; int err; - sleeve = create_sleeve(vm, obj, pages, page_sizes); + sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); if (IS_ERR(sleeve)) return PTR_ERR(sleeve); @@ -273,11 +284,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - dma_fence_init(&work->dma, - &clear_pages_work_ops, - &fence_lock, - i915->mm.unordered_timeline, - 0); + dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); i915_gem_object_lock(obj); @@ -288,7 +295,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, if (err < 0) { dma_fence_set_error(&work->dma, err); } else { - reservation_object_add_excl_fence(obj->base.resv, &work->dma); + dma_resv_add_excl_fence(obj->base.resv, &work->dma); err = 0; } i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0f2c22a3bcb6..a2e57e62af30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,7 +69,13 @@ #include <drm/i915_drm.h> +#include "gt/gen6_ppgtt.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_heartbeat.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" #include "i915_gem_context.h" #include "i915_globals.h" @@ -158,7 +164,7 @@ lookup_user_engine(struct i915_gem_context *ctx, if (!engine) return ERR_PTR(-EINVAL); - idx = engine->id; + idx = engine->legacy_idx; } else { idx = ci->engine_instance; } @@ -166,93 +172,71 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) +static struct i915_address_space * +context_get_vm_rcu(struct i915_gem_context *ctx) { - unsigned int max; + GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - lockdep_assert_held(&i915->contexts.mutex); + do { + struct i915_address_space *vm; - if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. + * We do not allow downgrading from full-ppgtt [to a shared + * global gtt], so ctx->vm cannot become NULL. */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); + vm = rcu_dereference(ctx->vm); + if (!kref_get_unless_zero(&vm->ref)) continue; - } - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of ctx, + * we no longer care if it is released from ctx, as + * it cannot be reallocated elsewhere. + */ - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; + if (vm == rcu_access_pointer(ctx->vm)) + return rcu_pointer_handoff(vm); + + i915_vm_put(vm); + } while (1); } -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) +static void intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx) { - int ret; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); + RCU_INIT_POINTER(ce->gem_context, ctx); - lockdep_assert_held(&i915->contexts.mutex); + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + ce->ring = __intel_context_ring_size(SZ_16K); - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } + if (rcu_access_pointer(ctx->vm)) { + struct i915_address_space *vm; - *out = ret; - return 0; -} + rcu_read_lock(); + vm = context_get_vm_rcu(ctx); /* hmm */ + rcu_read_unlock(); -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; + i915_vm_put(ce->vm); + ce->vm = vm; + } - if (list_empty(&ctx->hw_id_link)) - return; + GEM_BUG_ON(ce->timeline); + if (ctx->timeline) + ce->timeline = intel_timeline_get(ctx->timeline); - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -261,6 +245,7 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count) if (!e->engines[count]) continue; + RCU_INIT_POINTER(e->engines[count]->gem_context, NULL); intel_context_put(e->engines[count]); } kfree(e); @@ -278,6 +263,7 @@ static void free_engines_rcu(struct rcu_head *rcu) static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { + const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; struct i915_gem_engines *e; enum intel_engine_id id; @@ -287,104 +273,254 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) return ERR_PTR(-ENOMEM); init_rcu_head(&e->rcu); - for_each_engine(engine, ctx->i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(ctx, engine); + if (engine->legacy_idx == INVALID_ENGINE) + continue; + + GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES); + GEM_BUG_ON(e->engines[engine->legacy_idx]); + + ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, id); + __free_engines(e, e->num_engines + 1); return ERR_CAST(ce); } - e->engines[id] = ce; + intel_context_set_gem(ce, ctx); + + e->engines[engine->legacy_idx] = ce; + e->num_engines = max(e->num_engines, engine->legacy_idx); } - e->num_engines = id; + e->num_engines++; return e; } static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) - i915_timeline_put(ctx->timeline); + intel_timeline_put(ctx->timeline); - kfree(ctx->name); put_pid(ctx->pid); - - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } -static void context_close(struct i915_gem_context *ctx) +static inline struct i915_gem_engines * +__context_engines_static(const struct i915_gem_context *ctx) { - mutex_lock(&ctx->mutex); + return rcu_dereference_protected(ctx->engines, true); +} - i915_gem_context_set_closed(ctx); - ctx->file_priv = ERR_PTR(-EBADF); +static bool __reset_engine(struct intel_engine_cs *engine) +{ + struct intel_gt *gt = engine->gt; + bool success = false; + + if (!intel_has_reset_engine(gt)) + return false; + + if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + success = intel_engine_reset(engine, NULL) == 0; + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } + + return success; +} + +static void __reset_context(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_gt_handle_error(engine->gt, engine->mask, 0, + "context closure in %s", ctx->name); +} + +static bool __cancel_engine(struct intel_engine_cs *engine) +{ + /* + * Send a "high priority pulse" down the engine to cause the + * current request to be momentarily preempted. (If it fails to + * be preempted, it will be reset). As we have marked our context + * as banned, any incomplete request, including any running, will + * be skipped following the preemption. + * + * If there is no hangchecking (one of the reasons why we try to + * cancel the context) and no forced preemption, there may be no + * means by which we reset the GPU and evict the persistent hog. + * Ergo if we are unable to inject a preemptive pulse that can + * kill the banned context, we fallback to doing a local reset + * instead. + */ + if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && + !intel_engine_pulse(engine)) + return true; + + /* If we are unable to send a pulse, try resetting this engine. */ + return __reset_engine(engine); +} + +static struct intel_engine_cs *__active_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. + * Serialise with __i915_request_submit() so that it sees + * is-banned?, or we know the request is already inflight. */ - release_hw_id(ctx); + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + + engine = NULL; + if (i915_request_is_active(rq) && !rq->fence.error) + engine = rq->engine; + + spin_unlock_irq(&locked->active.lock); + + return engine; +} + +static struct intel_engine_cs *active_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = NULL; + struct i915_request *rq; + + if (!ce->timeline) + return NULL; + + mutex_lock(&ce->timeline->mutex); + list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { + if (i915_request_completed(rq)) + break; + + /* Check with the backend if the request is inflight */ + engine = __active_engine(rq); + if (engine) + break; + } + mutex_unlock(&ce->timeline->mutex); + + return engine; +} + +static void kill_context(struct i915_gem_context *ctx) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + /* + * Map the user's engine back to the actual engines; one virtual + * engine will be mapped to multiple engines, and using ctx->engine[] + * the same engine may be have multiple instances in the user's map. + * However, we only care about pending requests, so only include + * engines on which there are incomplete requests. + */ + for_each_gem_engine(ce, __context_engines_static(ctx), it) { + struct intel_engine_cs *engine; + + if (intel_context_set_banned(ce)) + continue; + + /* + * Check the current active state of this context; if we + * are currently executing on the GPU we need to evict + * ourselves. On the other hand, if we haven't yet been + * submitted to the GPU or if everything is complete, + * we have nothing to do. + */ + engine = active_engine(ce); + + /* First attempt to gracefully cancel the context */ + if (engine && !__cancel_engine(engine)) + /* + * If we are unable to send a preemptive pulse to bump + * the context from the GPU, we have to resort to a full + * reset. We hope the collateral damage is worth it. + */ + __reset_context(ctx, engine); + } +} + +static void set_closed_name(struct i915_gem_context *ctx) +{ + char *s; + + /* Replace '[]' with '<>' to indicate closed in debug prints */ + + s = strrchr(ctx->name, '['); + if (!s) + return; + + *s = '<'; + + s = strchr(s + 1, ']'); + if (s) + *s = '>'; +} + +static void context_close(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + i915_gem_context_set_closed(ctx); + set_closed_name(ctx); + + mutex_lock(&ctx->mutex); + + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + + ctx->file_priv = ERR_PTR(-EBADF); /* * The LUT uses the VMA as a backpointer to unref the object, @@ -394,31 +530,45 @@ static void context_close(struct i915_gem_context *ctx) lut_close(ctx); mutex_unlock(&ctx->mutex); + + /* + * If the user has disabled hangchecking, we can not be sure that + * the batches will ever complete after the context is closed, + * keeping the context and all resources pinned forever. So in this + * case we opt to forcibly kill off all remaining requests on + * context close. + */ + if (!i915_gem_context_is_persistent(ctx) || + !i915_modparams.enable_hangcheck) + kill_context(ctx); + i915_gem_context_put(ctx); } -static u32 default_desc_template(const struct drm_i915_private *i915, - const struct i915_address_space *vm) +static int __context_set_persistence(struct i915_gem_context *ctx, bool state) { - u32 address_mode; - u32 desc; - - desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (i915_gem_context_is_persistent(ctx) == state) + return 0; - address_mode = INTEL_LEGACY_32B_CONTEXT; - if (vm && i915_vm_is_4lvl(vm)) - address_mode = INTEL_LEGACY_64B_CONTEXT; - desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + if (state) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915_modparams.enable_hangcheck) + return -EINVAL; - if (IS_GEN(i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; + i915_gem_context_set_persistence(ctx); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers - * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; - */ + i915_gem_context_clear_persistence(ctx); + } - return desc; + return 0; } static struct i915_gem_context * @@ -434,7 +584,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -448,7 +597,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -457,14 +605,15 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); - - ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = - default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm); + __context_set_persistence(ctx, true /* cgroup hook? */); for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -472,13 +621,34 @@ err_free: return ERR_PTR(err); } +static void +context_apply_all(struct i915_gem_context *ctx, + void (*fn)(struct intel_context *ce, void *data), + void *data) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + fn(ce, data); + i915_gem_context_unlock_engines(ctx); +} + +static void __apply_ppgtt(struct intel_context *ce, void *vm) +{ + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); +} + static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); + + GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); - ctx->desc_template = default_desc_template(ctx->i915, vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); + context_apply_all(ctx, __apply_ppgtt, vm); return old; } @@ -486,36 +656,57 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); +} + +static void __set_timeline(struct intel_timeline **dst, + struct intel_timeline *src) +{ + struct intel_timeline *old = *dst; + + *dst = src ? intel_timeline_get(src) : NULL; + + if (old) + intel_timeline_put(old); +} + +static void __apply_timeline(struct intel_context *ce, void *timeline) +{ + __set_timeline(&ce->timeline, timeline); +} + +static void __assign_timeline(struct i915_gem_context *ctx, + struct intel_timeline *timeline) +{ + __set_timeline(&ctx->timeline, timeline); + context_apply_all(ctx, __apply_timeline, timeline); } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -523,20 +714,24 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; - timeline = i915_timeline_create(dev_priv, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); } - ctx->timeline = timeline; + __assign_timeline(ctx, timeline); + intel_timeline_put(timeline); } trace_i915_context_create(ctx); @@ -544,171 +739,26 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ctx; } -/** - * i915_gem_context_create_gvt - create a GVT GEM context - * @dev: drm device * - * - * This function is used to create a GVT specific GEM context. - * - * Returns: - * pointer to i915_gem_context on success, error pointer if failed - * - */ -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev) -{ - struct i915_gem_context *ctx; - int ret; - - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return ERR_PTR(-ENODEV); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_create_context(to_i915(dev), 0); - if (IS_ERR(ctx)) - goto out; - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret) { - context_close(ctx); - ctx = ERR_PTR(ret); - goto out; - } - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_set_closed(ctx); /* not user accessible */ - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_force_single_submission(ctx); - if (!USES_GUC_SUBMISSION(to_i915(dev))) - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); -out: - mutex_unlock(&dev->struct_mutex); - return ctx; -} - -static void -destroy_kernel_context(struct i915_gem_context **ctxp) -{ - struct i915_gem_context *ctx; - - /* Keep the context ref so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(ctxp)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - context_close(ctx); - i915_gem_context_free(ctx); -} - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) -{ - struct i915_gem_context *ctx; - int err; - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - - i915_gem_context_clear_bannable(ctx); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ctx->ring_size = PAGE_SIZE; - - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - return ctx; -} - -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -static bool needs_preempt_context(struct drm_i915_private *i915) +void i915_gem_init__contexts(struct drm_i915_private *i915) { - return HAS_EXECLISTS(i915); -} - -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - - /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); - GEM_BUG_ON(dev_priv->preempt_context); - - intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); - init_contexts(dev_priv); - - /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context\n"); - return PTR_ERR(ctx); - } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; - - /* highest priority; preempting task */ - if (needs_preempt_context(dev_priv)) { - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (!IS_ERR(ctx)) - dev_priv->preempt_context = ctx; - else - DRM_ERROR("Failed to create preempt context; disabling preemption\n"); - } - + init_contexts(&i915->gem.contexts); DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); - return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - - if (i915->preempt_context) - destroy_kernel_context(&i915->preempt_context); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); -} - -static int context_idr_cleanup(int id, void *p, void *data) -{ - context_close(p); - return 0; + flush_work(&i915->gem.contexts.free_work); } static int vm_idr_cleanup(int id, void *p, void *data) @@ -718,33 +768,29 @@ static int vm_idr_cleanup(int id, void *p, void *data) } static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv) + struct drm_i915_file_private *fpriv, + u32 *id) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); - ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", - current->comm, pid_nr(ctx->pid)); - if (!ctx->name) { - ret = -ENOMEM; - goto err_pid; - } + snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", + current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - mutex_lock(&fpriv->context_idr_lock); - ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&fpriv->context_idr_lock); - if (ret >= 0) - goto out; + ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); + if (ret) + put_pid(fetch_and_zero(&ctx->pid)); - kfree(fetch_and_zero(&ctx->name)); -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); -out: return ret; } @@ -754,51 +800,51 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int err; + u32 id; - mutex_init(&file_priv->context_idr_lock); - mutex_init(&file_priv->vm_idr_lock); + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); - idr_init(&file_priv->context_idr); + mutex_init(&file_priv->vm_idr_lock); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; } - err = gem_context_register(ctx, file_priv); + err = gem_context_register(ctx, file_priv, &id); if (err < 0) goto err_ctx; - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); - GEM_BUG_ON(err > 0); - + GEM_BUG_ON(id); return 0; err_ctx: context_close(ctx); err: idr_destroy(&file_priv->vm_idr); - idr_destroy(&file_priv->context_idr); + xa_destroy(&file_priv->context_xa); mutex_destroy(&file_priv->vm_idr_lock); - mutex_destroy(&file_priv->context_idr_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; + struct i915_gem_context *ctx; + unsigned long idx; - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->context_idr_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) + context_close(ctx); + xa_destroy(&file_priv->context_xa); idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -816,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(i915); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -891,6 +937,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); @@ -910,21 +957,23 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, cb_retire); - i915_active_acquire(&cb->base); + i915_active_init(&cb->base, NULL, cb_retire); + err = i915_active_acquire(&cb->base); + if (err) { + kfree(cb); + return err; + } for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; @@ -951,7 +1000,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->fence.context, rq); + err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); if (err) @@ -974,16 +1023,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - - vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_lock(); + vm = context_get_vm_rcu(ctx); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -994,7 +1039,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -1014,12 +1059,12 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) { - struct i915_address_space *vm = rq->gem_context->vm; + struct i915_address_space *vm = rq->context->vm; struct intel_engine_cs *engine = rq->engine; u32 base = engine->mmio_base; u32 *cs; @@ -1044,12 +1089,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) intel_ring_advance(rq, cs); } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + int err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; for (i = GEN8_3LVL_PDPES; i--; ) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); @@ -1060,9 +1111,6 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) } *cs++ = MI_NOOP; intel_ring_advance(rq, cs); - } else { - /* ppGTT is not part of the legacy context image */ - gen6_ppgtt_pin(i915_vm_to_ppgtt(vm)); } return 0; @@ -1070,10 +1118,20 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + return true; + if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); + return false; + + if (!atomic_read(&ce->pin_count)) + return true; + + /* ppGTT is not part of the legacy context image */ + if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) + return true; + + return false; } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1086,34 +1144,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) return -ENOENT; - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - + rcu_read_lock(); vm = idr_find(&file_priv->vm_idr, args->value); - if (vm) - i915_vm_get(vm); - mutex_unlock(&file_priv->vm_idr_lock); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto unlock; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -1128,14 +1186,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - ctx->vm = old; - ctx->desc_template = default_desc_template(ctx->i915, old); - i915_vm_put(vm); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1154,7 +1210,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); @@ -1180,44 +1236,32 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) * image, or into the registers directory, does not stick). Pristine * and idle contexts will be configured on pinning. */ - if (!intel_context_is_pinned(ce)) + if (!intel_context_pin_if_active(ce)) return 0; - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity by this context. */ - ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); - if (ret) - goto out_add; - - /* - * Guarantee context image and the timeline remains pinned until the - * modifying request is retired by setting the ce activity tracker. - * - * But we only need to take one pin on the account of it. Or in other - * words transfer the pinned ce object to tracked active request. - */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - ret = i915_active_ref(&ce->active, rq->fence.context, rq); - if (ret) - goto out_add; + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_unpin; + } - ret = gen8_emit_rpcs_config(rq, ce, sseu); + /* Serialise with the remote context */ + ret = intel_context_prepare_remote_request(ce, rq); + if (ret == 0) + ret = gen8_emit_rpcs_config(rq, ce, sseu); -out_add: i915_request_add(rq); +out_unpin: + intel_context_unpin(ce); return ret; } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; - GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); + GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); ret = intel_context_lock_pinned(ce); if (ret) @@ -1237,23 +1281,6 @@ unlock: } static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->gem_context->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, struct intel_sseu *context) @@ -1484,12 +1511,14 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) } } - ce = intel_execlists_create_virtual(set->ctx, siblings, n); + ce = intel_execlists_create_virtual(siblings, n); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out_siblings; } + intel_context_set_gem(ce, set->ctx); + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { intel_context_put(ce); err = -EEXIST; @@ -1636,6 +1665,7 @@ set_engines(struct i915_gem_context *ctx, for (n = 0; n < num_engines; n++) { struct i915_engine_class_instance ci; struct intel_engine_cs *engine; + struct intel_context *ce; if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { __free_engines(set.engines, n); @@ -1658,11 +1688,15 @@ set_engines(struct i915_gem_context *ctx, return -ENOENT; } - set.engines->engines[n] = intel_context_create(ctx, engine); - if (!set.engines->engines[n]) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { __free_engines(set.engines, n); - return -ENOMEM; + return PTR_ERR(ce); } + + intel_context_set_gem(ce, ctx); + + set.engines->engines[n] = ce; } set.engines->num_engines = num_engines; @@ -1683,7 +1717,7 @@ replace: i915_gem_context_set_user_engines(ctx); else i915_gem_context_clear_user_engines(ctx); - rcu_swap_protected(ctx->engines, set.engines, 1); + set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); mutex_unlock(&ctx->engines_mutex); call_rcu(&set.engines->rcu, free_engines_rcu); @@ -1776,7 +1810,7 @@ get_engines(struct i915_gem_context *ctx, if (e->engines[n]) { ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->instance; + ci.engine_instance = e->engines[n]->engine->uabi_instance; } if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { @@ -1792,6 +1826,54 @@ err_free: return err; } +static int +set_persistence(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + if (args->size) + return -EINVAL; + + return __context_set_persistence(ctx, args->value); +} + +static void __apply_priority(struct intel_context *ce, void *arg) +{ + struct i915_gem_context *ctx = arg; + + if (!intel_engine_has_semaphores(ce->engine)) + return; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); +} + +static int set_priority(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + ctx->sched.priority = I915_USER_PRIORITY(priority); + context_apply_all(ctx, __apply_priority, ctx); + + return 0; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1838,23 +1920,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, break; case I915_CONTEXT_PARAM_PRIORITY: - { - s64 priority = args->value; - - if (args->size) - ret = -EINVAL; - else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - ret = -ENODEV; - else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - ret = -EINVAL; - else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - ret = -EPERM; - else - ctx->sched.priority = - I915_USER_PRIORITY(priority); - } + ret = set_priority(ctx, args); break; case I915_CONTEXT_PARAM_SSEU: @@ -1869,6 +1935,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + ret = set_persistence(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1930,20 +2000,23 @@ static int clone_engines(struct i915_gem_context *dst, */ if (intel_engine_is_virtual(engine)) clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); + intel_execlists_clone_virtual(engine); else - clone->engines[n] = intel_context_create(dst, engine); + clone->engines[n] = intel_context_create(engine); if (IS_ERR_OR_NULL(clone->engines[n])) { __free_engines(clone, n); goto err_unlock; } + + intel_context_set_gem(clone->engines[n], dst); } clone->num_engines = n; user_engines = i915_gem_context_user_engines(src); i915_gem_context_unlock_engines(src); - free_engines(dst->engines); + /* Serialised by constructor */ + free_engines(__context_engines_static(dst)); RCU_INIT_POINTER(dst->engines, clone); if (user_engines) i915_gem_context_set_user_engines(dst); @@ -1978,7 +2051,8 @@ static int clone_sseu(struct i915_gem_context *dst, unsigned long n; int err; - clone = dst->engines; /* no locking required; sole access */ + /* no locking required; sole access under constructor*/ + clone = __context_engines_static(dst); if (e->num_engines != clone->num_engines) { err = -EINVAL; goto unlock; @@ -2011,13 +2085,8 @@ unlock: static int clone_timeline(struct i915_gem_context *dst, struct i915_gem_context *src) { - if (src->timeline) { - GEM_BUG_ON(src->timeline == dst->timeline); - - if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); - } + if (src->timeline) + __assign_timeline(dst, src->timeline); return 0; } @@ -2026,44 +2095,24 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; - rcu_read_lock(); - do { - vm = READ_ONCE(src->vm); - if (!vm) - break; - - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of src, - * we no longer care if it is released from src, as - * it cannot be reallocated elsewhere. - */ - - if (vm == READ_ONCE(src->vm)) - break; + if (!rcu_access_pointer(src->vm)) + return 0; - i915_vm_put(vm); - } while (1); + rcu_read_lock(); + vm = context_get_vm_rcu(src); rcu_read_unlock(); - if (vm) { + if (!mutex_lock_interruptible(&dst->mutex)) { __assign_ppgtt(dst, vm); - i915_vm_put(vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; } - return 0; + i915_vm_put(vm); + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -2134,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_context_create_ext *args = data; struct create_ext ext_data; int ret; + u32 id; if (!DRIVER_CAPS(i915)->has_logical_contexts) return -ENODEV; @@ -2141,24 +2191,18 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = i915_terminally_wedged(i915); + ret = intel_gt_terminally_wedged(&i915->gt); if (ret) return ret; ext_data.fpriv = file->driver_priv; if (client_is_banned(ext_data.fpriv)) { DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); + current->comm, task_pid_nr(current)); return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2171,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_ctx; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv); + ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); if (ret < 0) goto err_ctx; - args->ctx_id = ret; + args->ctx_id = id; DRM_DEBUG("HW context %d created\n", args->ctx_id); return 0; @@ -2198,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; - if (mutex_lock_interruptible(&file_priv->context_idr_lock)) - return -EINTR; - - ctx = idr_remove(&file_priv->context_idr, args->ctx_id); - mutex_unlock(&file_priv->context_idr_lock); + ctx = xa_erase(&file_priv->context_xa, args->ctx_id); if (!ctx) return -ENOENT; @@ -2285,12 +2325,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; - else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2325,6 +2365,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + args->size = 0; + args->value = i915_gem_context_is_persistent(ctx); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2356,7 +2401,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2378,7 +2423,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; @@ -2391,33 +2436,6 @@ out: return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9691dd062f72..3ae61a355d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,6 +11,7 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -74,24 +75,19 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); } -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) +static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx) { - return test_bit(CONTEXT_BANNED, &ctx->flags); + return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) +static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx) { - set_bit(CONTEXT_BANNED, &ctx->flags); + set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) +static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx) { - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); + clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } static inline bool @@ -112,37 +108,15 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - -static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) -{ - return !ctx->file_priv; -} - /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +void i915_gem_init__contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); void i915_gem_context_release(struct kref *ctx_ref); -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev); int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -160,9 +134,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); - static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { @@ -175,6 +146,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { @@ -198,12 +190,6 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx) } static inline struct intel_context * -i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - return i915_gem_context_engines(ctx)->engines[idx]; -} - -static inline struct intel_context * i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) { struct intel_context *ce = ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cc513410eeef..017ca803ab47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -26,7 +26,7 @@ struct pid; struct drm_i915_private; struct drm_i915_file_private; struct i915_address_space; -struct i915_timeline; +struct intel_timeline; struct intel_ring; struct i915_gem_engines { @@ -77,7 +77,7 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct i915_timeline *timeline; + struct intel_timeline *timeline; /** * @vm: unique address space (GTT) @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator @@ -100,15 +100,6 @@ struct i915_gem_context { */ struct pid *pid; - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - /** link: place with &drm_i915_private.context_list */ struct list_head link; struct llist_node free_link; @@ -137,43 +128,19 @@ struct i915_gem_context { #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 +#define UCONTEXT_PERSISTENCE 4 /** * @flags: small set of booleans */ unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; +#define CONTEXT_CLOSED 0 +#define CONTEXT_USER_ENGINES 1 struct mutex mutex; struct i915_sched_attr sched; - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - /** guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** @@ -197,6 +164,15 @@ struct i915_gem_context { * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** + * @name: arbitrary name, used for user debug + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + char name[TASK_COMM_LEN + 8]; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index cbf1701d3acc..372b57ca0efc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -6,7 +6,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include "i915_drv.h" #include "i915_gem_object.h" @@ -93,40 +93,6 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) i915_gem_object_unpin_map(obj); } -static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct page *page; - - if (page_num >= obj->base.size >> PAGE_SHIFT) - return NULL; - - if (!i915_gem_object_has_struct_page(obj)) - return NULL; - - if (i915_gem_object_pin_pages(obj)) - return NULL; - - /* Synchronisation is left to the caller (via .begin_cpu_access()) */ - page = i915_gem_object_get_page(obj, page_num); - if (IS_ERR(page)) - goto err_unpin; - - return kmap(page); - -err_unpin: - i915_gem_object_unpin_pages(obj); - return NULL; -} - -static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - kunmap(virt_to_page(addr)); - i915_gem_object_unpin_pages(obj); -} - static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); @@ -195,8 +161,6 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, - .map = i915_gem_dmabuf_kmap, - .unmap = i915_gem_dmabuf_kunmap, .mmap = i915_gem_dmabuf_mmap, .vmap = i915_gem_dmabuf_vmap, .vunmap = i915_gem_dmabuf_vunmap, @@ -204,8 +168,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags) +struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); @@ -222,7 +185,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return ERR_PTR(ret); } - return drm_gem_dmabuf_export(dev, &exp_info); + return drm_gem_dmabuf_export(gem_obj->dev, &exp_info); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -257,6 +220,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { + static struct lock_class_key lock_class; struct dma_buf_attachment *attach; struct drm_i915_gem_object *obj; int ret; @@ -288,7 +252,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 2e3ce2a69653..0cc40e77bbd2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -12,6 +12,8 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_vma.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { @@ -27,7 +29,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!i915_gem_object_is_framebuffer(obj)) return; i915_gem_object_lock(obj); @@ -148,9 +150,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->read_domains |= I915_GEM_DOMAIN_GTT; if (write) { + struct i915_vma *vma; + obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = I915_GEM_DOMAIN_GTT; obj->mm.dirty = true; + + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + i915_vma_set_ggtt_write(vma); + spin_unlock(&obj->vma.lock); } i915_gem_object_unpin_pages(obj); @@ -175,112 +185,34 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct i915_vma *vma; int ret; - assert_object_held(obj); - if (obj->cache_level == cache_level) return 0; - /* Inspect the list of currently bound VMA and unbind any that would - * be invalid given the new cache-level. This is principally to - * catch the issue of the CS prefetch crossing page boundaries and - * reading an invalid PTE on older architectures. - */ -restart: - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_pinned(vma)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - - if (!i915_vma_is_closed(vma) && - i915_gem_valid_gtt_space(vma, cache_level)) - continue; - - ret = i915_vma_unbind(vma); - if (ret) - return ret; + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; - /* As unbinding may affect other elements in the - * obj->vma_list (due to side-effects from retiring - * an active vma), play safe and restart the iterator. - */ - goto restart; - } + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + return ret; - /* We can reuse the existing drm_mm nodes but need to change the - * cache-level on the PTE. We could simply unbind them all and - * rebind with the correct cache-level on next use. However since - * we already have a valid slot, dma mapping, pages etc, we may as - * rewrite the PTE in the belief that doing so tramples upon less - * state and so involves less work. - */ - if (atomic_read(&obj->bind_count)) { - /* Before we change the PTE, the GPU must not be accessing it. - * If we wait upon the object, we know that all the bound - * VMA are no longer active. - */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - if (!HAS_LLC(to_i915(obj->base.dev)) && - cache_level != I915_CACHE_NONE) { - /* Access to snoopable pages through the GTT is - * incoherent and on some machines causes a hard - * lockup. Relinquish the CPU mmaping to force - * userspace to refault in the pages and we can - * then double check if the GTT mapping is still - * valid for that pointer access. - */ - i915_gem_object_release_mmap(obj); - - /* As we no longer need a fence for GTT access, - * we can relinquish it now (and so prevent having - * to steal a fence from someone else on the next - * fence request). Note GPU activity would have - * dropped the fence as all snoopable access is - * supposed to be linear. - */ - for_each_ggtt_vma(vma, obj) { - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - } - } else { - /* We either have incoherent backing store and - * so no GTT access or the architecture is fully - * coherent. In such cases, existing GTT mmaps - * ignore the cache bit in the PTE and we can - * rewrite it without confusing the GPU or having - * to force userspace to fault back in its mmaps. - */ - } - - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); - if (ret) - return ret; - } + /* Always invalidate stale cachelines */ + if (obj->cache_level != cache_level) { + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->cache_dirty = true; } - list_for_each_entry(vma, &obj->vma.list, obj_link) - vma->node.color = cache_level; - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->cache_dirty = true; /* Always invalidate stale cachelines */ + i915_gem_object_unlock(obj); - return 0; + /* The cache-level will be applied when each vma is rebound. */ + return i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); } int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, @@ -361,25 +293,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == level) - goto out; - - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret) - goto out; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - - ret = i915_gem_object_lock_interruptible(obj); - if (ret == 0) { - ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_unlock(obj); - } - mutex_unlock(&i915->drm.struct_mutex); + ret = i915_gem_object_set_cache_level(obj, level); out: i915_gem_object_put(obj); @@ -398,17 +312,16 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; int ret; - assert_object_held(obj); - - /* Mark the global pin early so that we account for the - * display coherency whilst setting up the cache domains. - */ - obj->pin_global++; + /* Frame buffer must be in LMEM (no migration yet) */ + if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) + return ERR_PTR(-EINVAL); - /* The display engine is not coherent with the LLC cache on gen6. As + /* + * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. @@ -418,14 +331,13 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ ret = i915_gem_object_set_cache_level(obj, - HAS_WT(to_i915(obj->base.dev)) ? + HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) { - vma = ERR_PTR(ret); - goto err_unpin_global; - } + if (ret) + return ERR_PTR(ret); - /* As the user may map the buffer once pinned in the display plane + /* + * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. However, * it may simply be too big to fit into mappable, in which case @@ -442,21 +354,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); if (IS_ERR(vma)) - goto err_unpin_global; + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - __i915_gem_object_flush_for_display(obj); - - /* It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - obj->read_domains |= I915_GEM_DOMAIN_GTT; - - return vma; + i915_gem_object_flush_if_display(obj); -err_unpin_global: - obj->pin_global--; return vma; } @@ -466,14 +369,19 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + if (!atomic_read(&obj->bind_count)) + return; mutex_lock(&i915->ggtt.vm.mutex); + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + spin_unlock(&obj->vma.lock); mutex_unlock(&i915->ggtt.vm.mutex); if (i915_gem_object_is_shrinkable(obj)) { @@ -481,7 +389,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_lock_irqsave(&i915->mm.obj_lock, flags); - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED && + !atomic_read(&obj->mm.shrink_pin)) list_move_tail(&obj->mm.link, &i915->mm.shrink_list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); @@ -495,12 +404,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) assert_object_held(obj); - if (WARN_ON(obj->pin_global == 0)) - return; - - if (--obj->pin_global == 0) - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - /* Bump the LRU to try and avoid premature eviction whilst flipping */ i915_gem_object_bump_inactive_ggtt(obj); @@ -551,13 +454,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) return 0; } -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -661,9 +557,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, i915_gem_object_unlock(obj); - if (write_domain != 0) - intel_fb_obj_invalidate(obj, - fb_write_origin(obj, write_domain)); + if (write_domain) + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); out_unpin: i915_gem_object_unpin_pages(obj); @@ -783,7 +678,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, } out: - intel_fb_obj_invalidate(obj, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 41dab9ea33cd..60c984e10c4a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -5,7 +5,7 @@ */ #include <linux/intel-iommu.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/sync_file.h> #include <linux/uaccess.h> @@ -16,13 +16,17 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" -#include "i915_gem_ioctls.h" +#include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_ioctls.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" -#include "intel_drv.h" enum { FORCE_CPU_RELOC = 1, @@ -222,10 +226,10 @@ struct i915_execbuffer { struct intel_engine_cs *engine; /** engine to queue the request to */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ + struct i915_vma *trampoline; /** trampoline used for chaining */ /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; @@ -274,28 +278,11 @@ struct i915_execbuffer { #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) -/* - * Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline u64 gen8_canonical_addr(u64 address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline u64 gen8_noncanonical_addr(u64 address) -{ - return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); -} - static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -696,7 +683,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ - err = i915_gem_evict_vm(eb->vm); + mutex_lock(&eb->context->vm->mutex); + err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -724,12 +713,8 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) { - eb->vm = ctx->vm; + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - } else { - eb->vm = &eb->i915->ggtt.vm; - } eb->context_flags = 0; if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) @@ -738,63 +723,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; @@ -802,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); @@ -831,7 +756,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_vma; } - vma = i915_vma_instance(obj, eb->vm, NULL); + vma = i915_vma_instance(obj, eb->context->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -962,7 +887,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; - cache->node.allocated = false; + cache->node.flags = 0; cache->rq = NULL; cache->rq_size = 0; } @@ -994,7 +919,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); i915_gem_object_unpin_map(cache->rq->batch->obj); - i915_gem_chipset_flush(cache->rq->i915); + intel_gt_chipset_flush(cache->rq->engine->gt); i915_request_add(cache->rq); cache->rq = NULL; @@ -1018,15 +943,18 @@ static void reloc_cache_reset(struct reloc_cache *cache) kunmap_atomic(vaddr); i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); } else { - wmb(); + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1077,11 +1005,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, void *vaddr; if (cache->vaddr) { + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + if (use_cpu_reloc(cache, obj)) return NULL; @@ -1093,32 +1025,27 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } } offset = cache->node.start; - if (cache->node.allocated) { - wmb(); + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); @@ -1201,25 +1128,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct drm_i915_gem_object *obj; + struct intel_engine_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); - cmd = i915_gem_object_pin_map(obj, + cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? I915_MAP_FORCE_WB : I915_MAP_FORCE_WC); - i915_gem_object_unpin_pages(obj); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } - batch = i915_vma_instance(obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1235,6 +1163,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } + err = intel_engine_pool_mark_active(pool, rq); + if (err) + goto err_request; + err = reloc_move_to_gpu(rq, vma); if (err) goto err_request; @@ -1246,8 +1178,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto skip_request; i915_vma_lock(batch); - GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; @@ -1260,7 +1193,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq_size = 0; /* Return with batch mapping (cmd) still pinned */ - return 0; + goto out_pool; skip_request: i915_request_skip(rq, err); @@ -1269,7 +1202,9 @@ err_request: err_unpin: i915_vma_unpin(batch); err_unmap: - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(pool->obj); +out_pool: + intel_engine_pool_put(pool); return err; } @@ -1286,10 +1221,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); @@ -1317,7 +1248,7 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { + !dma_resv_test_signaled_rcu(vma->resv, true))) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1442,7 +1373,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -1952,7 +1883,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) eb->exec = NULL; /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(eb->i915); + intel_gt_chipset_flush(eb->engine->gt); return 0; err_skip: @@ -1960,15 +1891,15 @@ err_skip: return err; } -static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) - return false; + return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { if (exec->num_cliprects || exec->cliprects_ptr) - return false; + return -EINVAL; } if (exec->DR4 == 0xffffffff) { @@ -1976,12 +1907,12 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) exec->DR4 = 0; } if (exec->DR1 || exec->DR4) - return false; + return -EINVAL; if ((exec->batch_start_offset | exec->batch_len) & 0x7) - return false; + return -EINVAL; - return true; + return 0; } static int i915_reset_gen7_sol_offsets(struct i915_request *rq) @@ -2009,51 +1940,227 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned int flags) { - struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; int err; - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, - PAGE_ALIGN(eb->batch_len)); - if (IS_ERR(shadow_batch_obj)) - return ERR_CAST(shadow_batch_obj); - - err = intel_engine_cmd_parser(eb->engine, - eb->batch->obj, - shadow_batch_obj, - eb->batch_start_offset, - eb->batch_len, - is_master); - if (err) { - if (err == -EACCES) /* unhandled chained batch */ - vma = NULL; - else - vma = ERR_PTR(err); - goto out; + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return vma; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + return ERR_PTR(err); + + return vma; +} + +struct eb_parse_work { + struct dma_fence_work base; + struct intel_engine_cs *engine; + struct i915_vma *batch; + struct i915_vma *shadow; + struct i915_vma *trampoline; + unsigned int batch_offset; + unsigned int batch_length; +}; + +static int __eb_parse(struct dma_fence_work *work) +{ + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + return intel_engine_cmd_parser(pw->engine, + pw->batch, + pw->batch_offset, + pw->batch_length, + pw->shadow, + pw->trampoline); +} + +static void __eb_parse_release(struct dma_fence_work *work) +{ + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + if (pw->trampoline) + i915_active_release(&pw->trampoline->active); + i915_active_release(&pw->shadow->active); + i915_active_release(&pw->batch->active); +} + +static const struct dma_fence_work_ops eb_parse_ops = { + .name = "eb_parse", + .work = __eb_parse, + .release = __eb_parse_release, +}; + +static int eb_parse_pipeline(struct i915_execbuffer *eb, + struct i915_vma *shadow, + struct i915_vma *trampoline) +{ + struct eb_parse_work *pw; + int err; + + pw = kzalloc(sizeof(*pw), GFP_KERNEL); + if (!pw) + return -ENOMEM; + + err = i915_active_acquire(&eb->batch->active); + if (err) + goto err_free; + + err = i915_active_acquire(&shadow->active); + if (err) + goto err_batch; + + if (trampoline) { + err = i915_active_acquire(&trampoline->active); + if (err) + goto err_shadow; } - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto out; + dma_fence_work_init(&pw->base, &eb_parse_ops); + + pw->engine = eb->engine; + pw->batch = eb->batch; + pw->batch_offset = eb->batch_start_offset; + pw->batch_length = eb->batch_len; + pw->shadow = shadow; + pw->trampoline = trampoline; + + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); + if (err) + goto err_trampoline; + + err = dma_resv_reserve_shared(pw->batch->resv, 1); + if (err) + goto err_batch_unlock; + + /* Wait for all writes (and relocs) into the batch to complete */ + err = i915_sw_fence_await_reservation(&pw->base.chain, + pw->batch->resv, NULL, false, + 0, I915_FENCE_GFP); + if (err < 0) + goto err_batch_unlock; + + /* Keep the batch alive and unwritten as we parse */ + dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); + + dma_resv_unlock(pw->batch->resv); + + /* Force execution to wait for completion of the parser */ + dma_resv_lock(shadow->resv, NULL); + dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); + dma_resv_unlock(shadow->resv); + + dma_fence_work_commit(&pw->base); + return 0; + +err_batch_unlock: + dma_resv_unlock(pw->batch->resv); +err_trampoline: + if (trampoline) + i915_active_release(&trampoline->active); +err_shadow: + i915_active_release(&shadow->active); +err_batch: + i915_active_release(&eb->batch->active); +err_free: + kfree(pw); + return err; +} + +static int eb_parse(struct i915_execbuffer *eb) +{ + struct intel_engine_pool_node *pool; + struct i915_vma *shadow, *trampoline; + unsigned int len; + int err; + + if (!eb_use_cmdparser(eb)) + return 0; + + len = eb->batch_len; + if (!CMDPARSER_USES_GGTT(eb->i915)) { + /* + * ppGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (!eb->context->vm->has_read_only) { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return -EINVAL; + } + } else { + len += I915_CMD_PARSER_TRAMPOLINE_SIZE; + } + + pool = intel_engine_get_pool(eb->engine, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + goto err; + } + i915_gem_object_set_readonly(shadow->obj); + + trampoline = NULL; + if (CMDPARSER_USES_GGTT(eb->i915)) { + trampoline = shadow; + + shadow = shadow_batch_pin(pool->obj, + &eb->engine->gt->ggtt->vm, + PIN_GLOBAL); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + shadow = trampoline; + goto err_shadow; + } + + eb->batch_flags |= I915_DISPATCH_SECURE; + } + + err = eb_parse_pipeline(eb, shadow, trampoline); + if (err) + goto err_trampoline; - eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->vma[eb->buffer_count] = i915_vma_get(shadow); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; + shadow->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; -out: - i915_gem_object_unpin_pages(shadow_batch_obj); - return vma; + eb->trampoline = trampoline; + eb->batch_start_offset = 0; + eb->batch = shadow; + + shadow->private = pool; + return 0; + +err_trampoline: + if (trampoline) + i915_vma_unpin(trampoline); +err_shadow: + i915_vma_unpin(shadow); +err: + intel_engine_pool_put(pool); + return err; } static void add_to_client(struct i915_request *rq, struct drm_file *file) { - rq->file_priv = file->driver_priv; - list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); + struct drm_i915_file_private *file_priv = file->driver_priv; + + rq->file_priv = file_priv; + + spin_lock(&file_priv->mm.lock); + list_add_tail(&rq->client_link, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); } static int eb_submit(struct i915_execbuffer *eb) @@ -2090,9 +2197,28 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (eb->trampoline) { + GEM_BUG_ON(eb->batch_start_offset); + err = eb->engine->emit_bb_start(eb->request, + eb->trampoline->node.start + + eb->batch_len, + 0, 0); + if (err) + return err; + } + + if (intel_context_nopreempt(eb->context)) + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); + return 0; } +static int num_vcs_engines(const struct drm_i915_private *i915) +{ + return hweight64(INTEL_INFO(i915)->engine_mask & + GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0)); +} + /* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. @@ -2105,8 +2231,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) - file_priv->bsd_engine = atomic_fetch_xor(1, - &dev_priv->mm.bsd_engine_dispatch_index); + file_priv->bsd_engine = + get_random_int() % num_vcs_engines(dev_priv); return file_priv->bsd_engine; } @@ -2119,18 +2245,57 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_throttle(struct intel_context *ce) +{ + struct intel_ring *ring = ce->ring; + struct intel_timeline *tl = ce->timeline; + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &tl->requests, link) { + if (rq->ring != ring) + continue; + + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->link == &tl->requests) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { + struct intel_timeline *tl; + struct i915_request *rq; int err; /* * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - err = i915_terminally_wedged(eb->i915); + err = intel_gt_terminally_wedged(ce->engine->gt); if (err) return err; + if (unlikely(intel_context_is_banned(ce))) + return -EIO; + /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for @@ -2140,14 +2305,60 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) if (err) return err; + /* + * Take a local wakeref for preparing to dispatch the execbuf as + * we expect to access the hardware fairly frequently in the + * process, and require the engine to be kept awake between accesses. + * Upon dispatch, we acquire another prolonged wakeref that we hold + * until the timeline is idle, which in turn releases the wakeref + * taken on the engine, and the parent device. + */ + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto err_unpin; + } + + intel_context_enter(ce); + rq = eb_throttle(ce); + + intel_context_timeline_unlock(tl); + + if (rq) { + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + err = -EINTR; + goto err_exit; + } + + i915_request_put(rq); + } + eb->engine = ce->engine; eb->context = ce; return 0; + +err_exit: + mutex_lock(&tl->mutex); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); +err_unpin: + intel_context_unpin(ce); + return err; } -static void eb_unpin_context(struct i915_execbuffer *eb) +static void eb_unpin_engine(struct i915_execbuffer *eb) { - intel_context_unpin(eb->context); + struct intel_context *ce = eb->context; + struct intel_timeline *tl = ce->timeline; + + mutex_lock(&tl->mutex); + intel_context_exit(ce); + mutex_unlock(&tl->mutex); + + intel_context_unpin(ce); } static unsigned int @@ -2165,7 +2376,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, return -1; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { @@ -2192,9 +2403,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_pin_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct intel_context *ce; unsigned int idx; @@ -2209,7 +2420,7 @@ eb_select_engine(struct i915_execbuffer *eb, if (IS_ERR(ce)) return PTR_ERR(ce); - err = eb_pin_context(eb, ce); + err = __eb_pin_engine(eb, ce); intel_context_put(ce); return err; @@ -2351,6 +2562,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; @@ -2362,7 +2574,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = i915; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2379,11 +2591,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.trampoline = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(i915) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(i915)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2427,25 +2647,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - /* - * Take a local wakeref for preparing to dispatch the execbuf as - * we expect to access the hardware fairly frequently in the - * process. Upon first dispatch, we acquire another prolonged - * wakeref that we hold until the GPU has been idle for at least - * 100ms. - */ - intel_gt_pm_get(eb.i915); + err = eb_pin_engine(&eb, file, args); + if (unlikely(err)) + goto err_context; err = i915_mutex_lock_interruptible(dev); if (err) - goto err_rpm; - - err = eb_select_engine(&eb, file, args); - if (unlikely(err)) - goto err_unlock; - - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) goto err_engine; err = eb_relocate(&eb); @@ -2473,34 +2680,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (eb_use_cmdparser(&eb)) { - struct i915_vma *vma; - - vma = eb_parse(&eb, drm_is_current_master(file)); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } - } - if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; + err = eb_parse(&eb); + if (err) + goto err_vma; + /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. @@ -2572,11 +2758,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = eb.batch; + if (eb.batch->private) + intel_engine_pool_mark_active(eb.batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: add_to_client(eb.request, file); + i915_request_get(eb.request); i915_request_add(eb.request); if (fences) @@ -2592,19 +2781,22 @@ err_request: fput(out_fence->file); } } + i915_request_put(eb.request); err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) i915_vma_unpin(eb.batch); + if (eb.batch->private) + intel_engine_pool_put(eb.batch->private); err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: + if (eb.trampoline) + i915_vma_unpin(eb.trampoline); mutex_unlock(&dev->struct_mutex); -err_rpm: - intel_gt_pm_put(eb.i915); +err_engine: + eb_unpin_engine(&eb); +err_context: i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); @@ -2670,8 +2862,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - if (!i915_gem_check_execbuffer(&exec2)) - return -EINVAL; + err = i915_gem_check_execbuffer(&exec2); + if (err) + return err; /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), @@ -2748,8 +2941,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; + err = i915_gem_check_execbuffer(args); + if (err) + return err; /* Allocate an extra slot for use by the command parser */ exec2_list = kvmalloc_array(count + 1, eb_element_size(), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c index cf0439e6be83..2f6100ec2608 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -69,8 +69,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) i915_sw_fence_init(&stub->chain, stub_notify); dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); + 0, 0); if (i915_sw_fence_await_reservation(&stub->chain, obj->base.resv, NULL, @@ -78,7 +77,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) I915_FENCE_GFP) < 0) goto err; - reservation_object_add_excl_fence(obj->base.resv, &stub->dma); + dma_resv_add_excl_fence(obj->base.resv, &stub->dma); return &stub->dma; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 0c41e04ab8fa..9cfb0e41ff06 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -117,13 +117,6 @@ create_st: goto err; } - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { @@ -172,6 +164,7 @@ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -186,7 +179,16 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); + + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index ddc7f2a52b3e..87d8b27f426d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -28,8 +28,8 @@ int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c new file mode 100644 index 000000000000..70543c83df06 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" +#include "i915_drv.h" + +const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { + .flags = I915_GEM_OBJECT_HAS_IOMEM, + + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_lmem_obj_ops; +} + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, flags); +} + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); + + obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h new file mode 100644 index 000000000000..fc3f15580fe3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_LMEM_H +#define __I915_GEM_LMEM_H + +#include <linux/types.h> + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; + +extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags); + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 39a661927d8e..0b6a442108de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -4,15 +4,22 @@ * Copyright © 2014-2016 Intel Corporation */ +#include <linux/anon_inodes.h> #include <linux/mman.h> +#include <linux/pfn_t.h> #include <linux/sizes.h> +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" + #include "i915_drv.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" +#include "i915_gem_mman.h" +#include "i915_trace.h" +#include "i915_user_extensions.h" #include "i915_vma.h" -#include "intel_drv.h" static inline bool __vma_matches(struct vm_area_struct *vma, struct file *filp, @@ -99,9 +106,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); if (IS_ERR_VALUE(addr)) goto err; - - /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put(obj); @@ -144,6 +148,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 3 - Remove implicit set-domain(GTT) and synchronisation on initial * pagefault; swapin remains transparent. * + * 4 - Support multiple fault handlers per object depending on object's + * backing storage (a.k.a. MMAP_OFFSET). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -171,7 +178,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 3; + return 4; } static inline struct i915_ggtt_view @@ -197,29 +204,80 @@ compute_partial_view(const struct drm_i915_gem_object *obj, return view; } -/** - * i915_gem_fault - fault a page into the GTT - * @vmf: fault info - * - * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped - * from userspace. The fault handler takes care of binding the object to - * the GTT (if needed), allocating and programming a fence register (again, - * only if needed based on whether the old reg is still valid or the object - * is tiled) and inserting a new PTE into the faulting process. - * - * Note that the faulting process may involve evicting existing objects - * from the GTT and/or fence registers to make room. So performance may - * suffer if the GTT working set is large or there are few fence registers - * left. - * - * The current feature set supported by i915_gem_fault() and thus GTT mmaps - * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). - */ -vm_fault_t i915_gem_fault(struct vm_fault *vmf) +static vm_fault_t i915_error_to_vmf_fault(int err) +{ + switch (err) { + default: + WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ + case -ENXIO: /* unable to access backing store (on device) */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + + case 0: + case -EAGAIN: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + } +} + +static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + resource_size_t iomap; + int err; + + /* Sanity check that we allow writing into this object */ + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out; + + iomap = -1; + if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) { + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; + } + + /* PTEs are revoked in obj->ops->put_pages() */ + err = remap_io_sg(area, + area->vm_start, area->vm_end - area->vm_start, + obj->mm.pages->sgl, iomap); + + if (area->vm_flags & VM_WRITE) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + +out: + return i915_error_to_vmf_fault(err); +} + +static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *i915 = to_i915(dev); struct intel_runtime_pm *rpm = &i915->runtime_pm; @@ -246,34 +304,22 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = i915_reset_trylock(i915); - if (srcu < 0) { - ret = srcu; - goto err_rpm; - } - - ret = i915_mutex_lock_interruptible(dev); + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } + goto err_rpm; /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; - flags = PIN_MAPPABLE; + flags = PIN_MAPPABLE | PIN_NOSEARCH; if (view.type == I915_GGTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ @@ -281,18 +327,26 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ - obj->frontbuffer_ggtt_origin = ORIGIN_CPU; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma) && !view.type) { + if (IS_ERR(vma)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } + + /* The entire mappable GGTT is pinned? Unexpected! */ + GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -308,101 +362,67 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_fence; - /* Mark as being mmapped into userspace for later revocation */ assert_rpm_wakelock_held(rpm); + + /* Mark as being mmapped into userspace for later revocation */ + mutex_lock(&i915->ggtt.vm.mutex); if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); - if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + mutex_unlock(&i915->ggtt.vm.mutex); + + /* Track the mmo associated with the fenced vma */ + vma->mmo = mmo; + + if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - GEM_BUG_ON(!obj->userfault_count); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: - i915_reset_unlock(i915, srcu); + intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); err: - switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!i915_terminally_wedged(i915)) - return VM_FAULT_SIGBUS; - /* else, fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - return VM_FAULT_SIGBUS; - } + return i915_error_to_vmf_fault(ret); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; GEM_BUG_ON(!obj->userfault_count); - obj->userfault_count = 0; - list_del(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - for_each_ggtt_vma(vma, obj) - i915_vma_unset_userfault(vma); + i915_vma_revoke_mmap(vma); + + GEM_BUG_ON(obj->userfault_count); } -/** - * i915_gem_object_release_mmap - remove physical page mappings - * @obj: obj in question - * - * Preserve the reservation of the mmapping with the DRM core code, but - * relinquish ownership of the pages back to the system. - * +/* * It is vital that we remove the page mapping if we have mapped a tiled * object through the GTT and then lose the fence register due to * resource pressure. Similarly if the object has been moved out of the * aperture, than pages mapped into userspace must be revoked. Removing the * mapping will then trigger a page fault on the next user access, allowing - * fixup by i915_gem_fault(). + * fixup by vm_fault_gtt(). */ -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); intel_wakeref_t wakeref; - /* Serialisation between user GTT access and our code depends upon + /* + * Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. * @@ -410,15 +430,16 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) * requirement that operations to the GGTT be made holding the RPM * wakeref. */ - lockdep_assert_held(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); + mutex_lock(&i915->ggtt.vm.mutex); if (!obj->userfault_count) goto out; - __i915_gem_object_release_mmap(obj); + __i915_gem_object_release_mmap_gtt(obj); - /* Ensure that the CPU's PTE are revoked and there are not outstanding + /* + * Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB * flushing implied above by changing the PTE above *should* be * sufficient, an extra barrier here just provides us with a bit @@ -428,59 +449,217 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static int create_mmap_offset(struct drm_i915_gem_object *obj) +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct i915_mmap_offset *mmo, *mn; + + spin_lock(&obj->mmo.lock); + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, offset) { + /* + * vma_node_unmap for GTT mmaps handled already in + * __i915_gem_object_release_mmap_gtt + */ + if (mmo->mmap_type == I915_MMAP_TYPE_GTT) + continue; + + spin_unlock(&obj->mmo.lock); + drm_vma_node_unmap(&mmo->vma_node, + obj->base.dev->anon_inode->i_mapping); + spin_lock(&obj->mmo.lock); + } + spin_unlock(&obj->mmo.lock); +} + +/** + * i915_gem_object_release_mmap - remove physical page mappings + * @obj: obj in question + * + * Preserve the reservation of the mmapping with the DRM core code, but + * relinquish ownership of the pages back to the system. + */ +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + i915_gem_object_release_mmap_gtt(obj); + i915_gem_object_release_mmap_offset(obj); +} + +static struct i915_mmap_offset * +lookup_mmo(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type) +{ + struct rb_node *rb; + + spin_lock(&obj->mmo.lock); + rb = obj->mmo.offsets.rb_node; + while (rb) { + struct i915_mmap_offset *mmo = + rb_entry(rb, typeof(*mmo), offset); + + if (mmo->mmap_type == mmap_type) { + spin_unlock(&obj->mmo.lock); + return mmo; + } + + if (mmo->mmap_type < mmap_type) + rb = rb->rb_right; + else + rb = rb->rb_left; + } + spin_unlock(&obj->mmo.lock); + + return NULL; +} + +static struct i915_mmap_offset * +insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo) +{ + struct rb_node *rb, **p; + + spin_lock(&obj->mmo.lock); + rb = NULL; + p = &obj->mmo.offsets.rb_node; + while (*p) { + struct i915_mmap_offset *pos; + + rb = *p; + pos = rb_entry(rb, typeof(*pos), offset); + + if (pos->mmap_type == mmo->mmap_type) { + spin_unlock(&obj->mmo.lock); + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); + return pos; + } + + if (pos->mmap_type < mmo->mmap_type) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&mmo->offset, rb, p); + rb_insert_color(&mmo->offset, &obj->mmo.offsets); + spin_unlock(&obj->mmo.lock); + + return mmo; +} + +static struct i915_mmap_offset * +mmap_offset_attach(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type, + struct drm_file *file) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_mmap_offset *mmo; int err; - err = drm_gem_create_mmap_offset(&obj->base); + mmo = lookup_mmo(obj, mmap_type); + if (mmo) + goto out; + + mmo = kmalloc(sizeof(*mmo), GFP_KERNEL); + if (!mmo) + return ERR_PTR(-ENOMEM); + + mmo->obj = obj; + mmo->mmap_type = mmap_type; + drm_vma_node_reset(&mmo->vma_node); + + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); if (likely(!err)) - return 0; + goto insert; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + if (err) + goto err; - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; + i915_gem_drain_freed_objects(i915); + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); + if (err) + goto err; - } while (flush_delayed_work(&i915->gem.retire_work)); +insert: + mmo = insert_mmo(obj, mmo); + GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo); +out: + if (file) + drm_vma_node_allow(&mmo->vma_node, file); + return mmo; - return err; +err: + kfree(mmo); + return ERR_PTR(err); } -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - u32 handle, - u64 *offset) +static int +__assign_mmap_offset(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) { struct drm_i915_gem_object *obj; - int ret; + struct i915_mmap_offset *mmo; + int err; obj = i915_gem_object_lookup(file, handle); if (!obj) return -ENOENT; - ret = create_mmap_offset(obj); - if (ret == 0) - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); + if (mmap_type == I915_MMAP_TYPE_GTT && + i915_gem_object_never_bind_ggtt(obj)) { + err = -ENODEV; + goto out; + } + + if (mmap_type != I915_MMAP_TYPE_GTT && + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) { + err = -ENODEV; + goto out; + } + + mmo = mmap_offset_attach(obj, mmap_type, file); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + *offset = drm_vma_node_offset_addr(&mmo->vma_node); + err = 0; +out: i915_gem_object_put(obj); - return ret; + return err; +} + +int +i915_gem_dumb_mmap_offset(struct drm_file *file, + struct drm_device *dev, + u32 handle, + u64 *offset) +{ + enum i915_mmap_type mmap_type; + + if (boot_cpu_has(X86_FEATURE_PAT)) + mmap_type = I915_MMAP_TYPE_WC; + else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) + return -ENODEV; + else + mmap_type = I915_MMAP_TYPE_GTT; + + return __assign_mmap_offset(file, handle, mmap_type, offset); } /** - * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing + * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device * @data: GTT mapping ioctl data * @file: GEM object info @@ -495,12 +674,220 @@ i915_gem_mmap_gtt(struct drm_file *file, * userspace. */ int -i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_mmap_offset *args = data; + enum i915_mmap_type type; + int err; + + /* + * Historically we failed to check args.pad and args.offset + * and so we cannot use those fields for user input and we cannot + * add -EINVAL for them as the ABI is fixed, i.e. old userspace + * may be feeding in garbage in those fields. + * + * if (args->pad) return -EINVAL; is verbotten! + */ + + err = i915_user_extensions(u64_to_user_ptr(args->extensions), + NULL, 0, NULL); + if (err) + return err; + + switch (args->flags) { + case I915_MMAP_OFFSET_GTT: + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return -ENODEV; + type = I915_MMAP_TYPE_GTT; + break; + + case I915_MMAP_OFFSET_WC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_WC; + break; + + case I915_MMAP_OFFSET_WB: + type = I915_MMAP_TYPE_WB; + break; + + case I915_MMAP_OFFSET_UC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_UC; + break; + + default: + return -EINVAL; + } + + return __assign_mmap_offset(file, args->handle, type, &args->offset); +} + +static void vm_open(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_gtt = { + .fault = vm_fault_gtt, + .open = vm_open, + .close = vm_close, +}; + +static const struct vm_operations_struct vm_ops_cpu = { + .fault = vm_fault_cpu, + .open = vm_open, + .close = vm_close, +}; + +static int singleton_release(struct inode *inode, struct file *file) +{ + struct drm_i915_private *i915 = file->private_data; + + cmpxchg(&i915->gem.mmap_singleton, file, NULL); + drm_dev_put(&i915->drm); + + return 0; +} + +static const struct file_operations singleton_fops = { + .owner = THIS_MODULE, + .release = singleton_release, +}; + +static struct file *mmap_singleton(struct drm_i915_private *i915) +{ + struct file *file; + + rcu_read_lock(); + file = i915->gem.mmap_singleton; + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + if (file) + return file; + + file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR); + if (IS_ERR(file)) + return file; + + /* Everyone shares a single global address space */ + file->f_mapping = i915->drm.anon_inode->i_mapping; + + smp_store_mb(i915->gem.mmap_singleton, file); + drm_dev_get(&i915->drm); + + return file; +} + +/* + * This overcomes the limitation in drm_gem_mmap's assignment of a + * drm_gem_object as the vma->vm_private_data. Since we need to + * be able to resolve multiple mmap offsets which could be tied + * to a single gem object. + */ +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) { - struct drm_i915_gem_mmap_gtt *args = data; + struct drm_vma_offset_node *node; + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct drm_i915_gem_object *obj = NULL; + struct i915_mmap_offset *mmo = NULL; + struct file *anon; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + rcu_read_lock(); + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (node && drm_vma_node_is_allowed(node, priv)) { + /* + * Skip 0-refcnted objects as it is in the process of being + * destroyed and will be invalid when the vma manager lock + * is released. + */ + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + rcu_read_unlock(); + if (!obj) + return node ? -EACCES : -EINVAL; + + if (i915_gem_object_is_readonly(obj)) { + if (vma->vm_flags & VM_WRITE) { + i915_gem_object_put(obj); + return -EINVAL; + } + vma->vm_flags &= ~VM_MAYWRITE; + } - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); + anon = mmap_singleton(to_i915(dev)); + if (IS_ERR(anon)) { + i915_gem_object_put(obj); + return PTR_ERR(anon); + } + + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_private_data = mmo; + + /* + * We keep the ref on mmo->obj, not vm_file, but we require + * vma->vm_file->f_mapping, see vma_link(), for later revocation. + * Our userspace is accustomed to having per-file resource cleanup + * (i.e. contexts, objects and requests) on their close(fd), which + * requires avoiding extraneous references to their filp, hence why + * we prefer to use an anonymous file for their mmaps. + */ + fput(vma->vm_file); + vma->vm_file = anon; + + switch (mmo->mmap_type) { + case I915_MMAP_TYPE_WC: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_WB: + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_UC: + vma->vm_page_prot = + pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_GTT: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_gtt; + break; + } + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + + return 0; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h new file mode 100644 index 000000000000..862e01b7cb69 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_MMAN_H__ +#define __I915_GEM_MMAN_H__ + +#include <linux/mm_types.h> +#include <linux/types.h> + +struct drm_device; +struct drm_file; +struct drm_i915_gem_object; +struct file; +struct i915_mmap_offset; +struct mutex; + +int i915_gem_mmap_gtt_version(void); +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +int i915_gem_dumb_mmap_offset(struct drm_file *file_priv, + struct drm_device *dev, + u32 handle, u64 *offset); + +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index be6caccce0c5..35985218bd85 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -22,13 +22,17 @@ * */ -#include "display/intel_frontbuffer.h" +#include <linux/sched/mm.h> +#include "display/intel_frontbuffer.h" +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" #include "i915_globals.h" +#include "i915_trace.h" static struct i915_global_object { struct i915_global base; @@ -45,35 +49,26 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) return kmem_cache_free(global.slab_objects, obj); } -static void -frontbuffer_retire(struct i915_active_request *active, - struct i915_request *request) -{ - struct drm_i915_gem_object *obj = - container_of(active, typeof(*obj), frontbuffer_write); - - intel_fb_obj_flush(obj, ORIGIN_CS); -} - void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops) + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key) { - mutex_init(&obj->mm.lock); + __mutex_init(&obj->mm.lock, "obj->mm.lock", key); spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->mm.link); + INIT_LIST_HEAD(&obj->lut_list); - INIT_LIST_HEAD(&obj->batch_pool_link); + + spin_lock_init(&obj->mmo.lock); + obj->mmo.offsets = RB_ROOT; init_rcu_head(&obj->rcu); obj->ops = ops; - obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - i915_active_request_init(&obj->frontbuffer_write, - NULL, frontbuffer_retire); - obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); @@ -105,6 +100,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_mmap_offset *mmo, *mn; struct i915_lut_handle *lut, *ln; LIST_HEAD(close); @@ -120,6 +116,11 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } i915_gem_object_unlock(obj); + spin_lock(&obj->mmo.lock); + rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) + drm_vma_node_revoke(&mmo->vma_node, file); + spin_unlock(&obj->mmo.lock); + list_for_each_entry_safe(lut, ln, &close, obj_link) { struct i915_gem_context *ctx = lut->ctx; struct i915_vma *vma; @@ -146,6 +147,19 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } } +static void __i915_gem_free_object_rcu(struct rcu_head *head) +{ + struct drm_i915_gem_object *obj = + container_of(head, typeof(*obj), rcu); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + dma_resv_fini(&obj->base._resv); + i915_gem_object_free(obj); + + GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); + atomic_dec(&i915->mm.free_count); +} + static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { @@ -154,125 +168,106 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; + struct i915_mmap_offset *mmo, *mn; trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + __i915_vma_put(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - if (i915_gem_object_has_pages(obj) && - i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; + i915_gem_object_release_mmap(obj); - spin_lock_irqsave(&i915->mm.obj_lock, flags); - list_del_init(&obj->mm.link); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, + offset) { + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); } - - mutex_unlock(&i915->drm.struct_mutex); + obj->mmo.offsets = RB_ROOT; GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); - GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); - if (obj->ops->release) - obj->ops->release(obj); - atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); - drm_gem_object_release(&obj->base); - - bitmap_free(obj->bit_17); - i915_gem_object_free(obj); + drm_gem_free_mmap_offset(&obj->base); - GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); - atomic_dec(&i915->mm.free_count); + if (obj->ops->release) + obj->ops->release(obj); - cond_resched(); + /* But keep the pointer alive for RCU-protected lookups */ + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } intel_runtime_pm_put(&i915->runtime_pm, wakeref); } void i915_gem_flush_free_objects(struct drm_i915_private *i915) { - struct llist_node *freed; - - /* Free the oldest, most stale object to keep the free_list short */ - freed = NULL; - if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ - /* Only one consumer of llist_del_first() allowed */ - spin_lock(&i915->mm.free_lock); - freed = llist_del_first(&i915->mm.free_list); - spin_unlock(&i915->mm.free_lock); - } - if (unlikely(freed)) { - freed->next = NULL; + struct llist_node *freed = llist_del_all(&i915->mm.free_list); + + if (unlikely(freed)) __i915_gem_free_objects(i915, freed); - } } static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, struct drm_i915_private, mm.free_work); - struct llist_node *freed; - - /* - * All file-owned VMA should have been released by this point through - * i915_gem_close_object(), or earlier by i915_gem_context_close(). - * However, the object may also be bound into the global GTT (e.g. - * older GPUs without per-process support, or for direct access through - * the GTT either for the user or for scanout). Those VMA still need to - * unbound now. - */ - - spin_lock(&i915->mm.free_lock); - while ((freed = llist_del_all(&i915->mm.free_list))) { - spin_unlock(&i915->mm.free_lock); - __i915_gem_free_objects(i915, freed); - if (need_resched()) - return; - - spin_lock(&i915->mm.free_lock); - } - spin_unlock(&i915->mm.free_lock); + i915_gem_flush_free_objects(i915); } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void i915_gem_free_object(struct drm_gem_object *gem_obj) { - struct drm_i915_gem_object *obj = - container_of(head, typeof(*obj), rcu); + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_i915_private *i915 = to_i915(obj->base.dev); + GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); + /* - * We reuse obj->rcu for the freed list, so we had better not treat - * it like a rcu_head from this point forwards. And we expect all - * objects to be freed via this path. + * Before we free the object, make sure any pure RCU-only + * read-side critical sections are complete, e.g. + * i915_gem_busy_ioctl(). For the corresponding synchronized + * lookup see i915_gem_object_lookup_rcu(). */ - destroy_rcu_head(&obj->rcu); + atomic_inc(&i915->mm.free_count); + + /* + * This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + i915_gem_object_make_unshrinkable(obj); /* * Since we require blocking on struct_mutex to unbind the freed @@ -288,27 +283,6 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) queue_work(i915->wq, &i915->mm.free_work); } -void i915_gem_free_object(struct drm_gem_object *gem_obj) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - - /* - * Before we free the object, make sure any pure RCU-only - * read-side critical sections are complete, e.g. - * i915_gem_busy_ioctl(). For the corresponding synchronized - * lookup see i915_gem_object_lookup_rcu(). - */ - atomic_inc(&to_i915(obj->base.dev)->mm.free_count); - call_rcu(&obj->rcu, __i915_gem_free_object_rcu); -} - -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { return !(obj->cache_level == I915_CACHE_NONE || @@ -319,7 +293,6 @@ void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; assert_object_held(obj); @@ -329,17 +302,14 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: - i915_gem_flush_ggtt_writes(dev_priv); - - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); - + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { - if (vma->iomap) - continue; - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } + spin_unlock(&obj->vma.lock); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); break; case I915_GEM_DOMAIN_WC: @@ -359,6 +329,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, obj->write_domain = 0; } +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_flush(front, origin); + intel_frontbuffer_put(front); + } +} + +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_invalidate(front, origin); + intel_frontbuffer_put(front); + } +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index dfebd5706f16..9c86f2dea947 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -13,9 +13,10 @@ #include <drm/i915_drm.h> +#include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" - #include "i915_gem_gtt.h" +#include "i915_vma_types.h" void i915_gem_init__objects(struct drm_i915_private *i915); @@ -23,12 +24,14 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops); + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key); struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size); +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, - const void *data, size_t size); + const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, @@ -67,21 +70,29 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) } static inline struct drm_i915_gem_object * +i915_gem_object_get_rcu(struct drm_i915_gem_object *obj) +{ + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + + return obj; +} + +static inline struct drm_i915_gem_object * i915_gem_object_lookup(struct drm_file *file, u32 handle) { struct drm_i915_gem_object *obj; rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; + obj = i915_gem_object_get_rcu(obj); rcu_read_unlock(); return obj; } __deprecated -extern struct drm_gem_object * +struct drm_gem_object * drm_gem_object_lookup(struct drm_file *file, u32 handle); __attribute__((nonnull)) @@ -99,22 +110,27 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_put(&obj->base); } -#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv) +#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { - reservation_object_lock(obj->base.resv, NULL); + dma_resv_lock(obj->base.resv, NULL); +} + +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); } static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { - return reservation_object_lock_interruptible(obj->base.resv, NULL); + return dma_resv_lock_interruptible(obj->base.resv, NULL); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { - reservation_object_unlock(obj->base.resv); + dma_resv_unlock(obj->base.resv); } struct dma_fence * @@ -125,49 +141,74 @@ void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { - obj->base.vma_node.readonly = true; + obj->flags |= I915_BO_READONLY; } static inline bool i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) { - return obj->base.vma_node.readonly; + return obj->flags & I915_BO_READONLY; +} + +static inline bool +i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; +} + +static inline bool +i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_VOLATILE; +} + +static inline void +i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) +{ + obj->flags |= I915_BO_ALLOC_VOLATILE; +} + +static inline bool +i915_gem_object_type_has(const struct drm_i915_gem_object *obj, + unsigned long flags) +{ + return obj->ops->flags & flags; } static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); } static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); } static inline bool -i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); } static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->active_count); + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); } static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->framebuffer_references); + return READ_ONCE(obj->frontbuffer); } static inline unsigned int @@ -239,10 +280,27 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ + I915_MM_NORMAL = 0, + /* + * Only used by struct_mutex, when called "recursively" from + * direct-reclaim-esque. Safe because there is only every one + * struct_mutex in the entire system. + */ + I915_MM_SHRINKER = 1, + /* + * Used for obj->mm.lock when allocating pages. Safe because the object + * isn't yet on any LRU, and therefore the shrinker can't deadlock on + * it. As soon as the object has pages, obj->mm.lock nests within + * fs_reclaim. + */ + I915_MM_GET_PAGES = 1, +}; + static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - might_lock(&obj->mm.lock); + might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; @@ -285,13 +343,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ - I915_MM_NORMAL = 0, - I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ -}; - -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass); +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); @@ -344,9 +396,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); - void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); @@ -373,7 +422,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) struct dma_fence *fence; rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_rcu(obj->base.resv); rcu_read_unlock(); if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) @@ -400,6 +449,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags); void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); + static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->cache_dirty) @@ -408,7 +461,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -425,6 +479,26 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); -#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX) + +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); + +static inline void +i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_flush_frontbuffer(obj, origin); +} + +static inline void +i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_invalidate_frontbuffer(obj, origin); +} #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index cb42e3a312e2..70809d8897cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -3,59 +3,137 @@ * Copyright © 2019 Intel Corporation */ -#include "i915_gem_object_blt.h" - +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" +#include "gt/intel_ring.h" #include "i915_gem_clflush.h" -#include "intel_drv.h" +#include "i915_gem_object_blt.h" -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value) +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value) { - u32 *cs; - - cs = intel_ring_begin(rq, 8); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (INTEL_GEN(rq->i915) >= 8) { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = lower_32_bits(vma->node.start); - *cs++ = upper_32_bits(vma->node.start); - *cs++ = value; - *cs++ = MI_NOOP; - } else { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = vma->node.start; - *cs++ = value; - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 offset; + u64 count; + u64 rem; + u32 size; + u32 *cmd; + int err; + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(round_up(vma->size, block_size), block_size); + size = (1 + 8 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_get_pool(ce->engine, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = vma->size; + offset = vma->node.start; + + do { + u32 size = min_t(u64, rem, block_size); + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = offset; + *cmd++ = value; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; } - intel_ring_advance(rq, cs); + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; - return 0; +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (unlikely(err)) + return err; + + return intel_engine_pool_mark_active(vma->private, rq); +} + +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) +{ + i915_vma_unpin(vma); + intel_engine_pool_put(vma->private); + intel_engine_pm_put(ce->engine); } int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct i915_request *rq; + struct i915_vma *batch; struct i915_vma *vma; int err; - /* XXX: ce->vm please */ - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -69,12 +147,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, i915_gem_object_unlock(obj); } - rq = i915_request_create(ce); + batch = intel_emit_vma_fill_blt(ce, vma, value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } + + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + err = i915_request_await_object(rq, obj, true); if (unlikely(err)) goto out_request; @@ -86,22 +174,229 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, } i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (unlikely(err)) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, value); + err = ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) i915_request_skip(rq, err); i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); out_unpin: i915_vma_unpin(vma); return err; } +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst) +{ + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 src_offset, dst_offset; + u64 count, rem; + u32 size, *cmd; + int err; + + GEM_BUG_ON(src->size != dst->size); + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(round_up(dst->size, block_size), block_size); + size = (1 + 11 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_get_pool(ce->engine, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = src->size; + src_offset = src->node.start; + dst_offset = dst->node.start; + + do { + size = min_t(u64, rem, block_size); + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 9) { + *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else { + *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cmd++ = dst_offset; + *cmd++ = PAGE_SIZE; + *cmd++ = src_offset; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + src_offset += size; + dst_offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; + +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write) +{ + struct drm_i915_gem_object *obj = vma->obj; + + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + + return i915_request_await_object(rq, obj, write); +} + +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce) +{ + struct drm_gem_object *objs[] = { &src->base, &dst->base }; + struct i915_address_space *vm = ce->vm; + struct i915_vma *vma[2], *batch; + struct ww_acquire_ctx acquire; + struct i915_request *rq; + int err, i; + + vma[0] = i915_vma_instance(src, vm, NULL); + if (IS_ERR(vma[0])) + return PTR_ERR(vma[0]); + + err = i915_vma_pin(vma[0], 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + vma[1] = i915_vma_instance(dst, vm, NULL); + if (IS_ERR(vma[1])) + goto out_unpin_src; + + err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + if (unlikely(err)) + goto out_unpin_src; + + batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin_dst; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); + if (unlikely(err)) + goto out_request; + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + err = move_to_gpu(vma[i], rq, i); + if (unlikely(err)) + goto out_unlock; + } + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; + + err = i915_vma_move_to_active(vma[i], rq, flags); + if (unlikely(err)) + goto out_unlock; + } + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_unlock; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); +out_unlock: + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); +out_request: + if (unlikely(err)) + i915_request_skip(rq, err); + + i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); +out_unpin_dst: + i915_vma_unpin(vma[1]); +out_unpin_src: + i915_vma_unpin(vma[0]); + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_object_blt.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 7ec7de6ac0c0..243a43a87824 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -8,17 +8,30 @@ #include <linux/types.h> +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_vma.h" + struct drm_i915_gem_object; -struct intel_context; -struct i915_request; -struct i915_vma; -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value); +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value); + +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst); + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value); +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 18bf4f8d6d80..f64ad77e6b1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,11 +8,13 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include <drm/drm_gem.h> +#include <uapi/drm/i915_drm.h> #include "i915_active.h" #include "i915_selftest.h" struct drm_i915_gem_object; +struct intel_fronbuffer; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -29,9 +31,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(4) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -59,6 +63,21 @@ struct drm_i915_gem_object_ops { void (*release)(struct drm_i915_gem_object *obj); }; +enum i915_mmap_type { + I915_MMAP_TYPE_GTT = 0, + I915_MMAP_TYPE_WC, + I915_MMAP_TYPE_WB, + I915_MMAP_TYPE_UC, +}; + +struct i915_mmap_offset { + struct drm_vma_offset_node vma_node; + struct drm_i915_gem_object *obj; + enum i915_mmap_type mmap_type; + + struct rb_node offset; +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -114,9 +133,19 @@ struct drm_i915_gem_object { unsigned int userfault_count; struct list_head userfault_link; - struct list_head batch_pool_link; + struct { + spinlock_t lock; /* Protects access to mmo offsets */ + struct rb_root offsets; + } mmo; + I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) +#define I915_BO_READONLY BIT(2) + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit @@ -142,9 +171,7 @@ struct drm_i915_gem_object { */ u16 write_domain; - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - struct i915_active_request frontbuffer_write; + struct intel_frontbuffer __rcu *frontbuffer; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -154,18 +181,34 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - unsigned int active_count; - /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; struct { - struct mutex lock; /* protects the pages and their use */ + /* + * Protects the pages and their use. Do not use directly, but + * instead go through the pin/unpin interfaces. + */ + struct mutex lock; atomic_t pages_pin_count; + atomic_t shrink_pin; + + /** + * Memory region for this object. + */ + struct intel_memory_region *region; + /** + * List of memory region blocks allocated for this object. + */ + struct list_head blocks; + /** + * Element within memory_region->objects or region->purgeable + * if the object is marked as DONTNEED. Access is protected by + * region->obj_lock. + */ + struct list_head region_link; struct sg_table *pages; void *mapping; - /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of @@ -226,9 +269,6 @@ struct drm_i915_gem_object { bool quirked:1; } mm; - /** References from framebuffers, locks out tiling changes. */ - unsigned int framebuffer_references; - /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 65eb430cedba..54aca5c9101e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,6 +7,8 @@ #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -18,6 +20,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->mm.lock); + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_DONTNEED; + /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; @@ -71,6 +76,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, list = &i915->mm.shrink_list; list_add_tail(&obj->mm.link, list); + atomic_set(&obj->mm.shrink_pin, 0); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } } @@ -101,7 +107,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return err; @@ -150,37 +156,30 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); +} + struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) return pages; - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_WILLNEED; - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - list_del(&obj->mm.link); - i915->mm.shrink_count--; - i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { - void *ptr; - - ptr = page_mask_bits(obj->mm.mapping); - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); - + unmap_object(obj, page_mask_bits(obj->mm.mapping)); obj->mm.mapping = NULL; } @@ -190,8 +189,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) return pages; } -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -202,12 +200,14 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, GEM_BUG_ON(atomic_read(&obj->bind_count)); /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock_nested(&obj->mm.lock, subclass); + mutex_lock(&obj->mm.lock); if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { err = -EBUSY; goto unlock; } + i915_gem_object_release_mmap_offset(obj); + /* * ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -234,36 +234,44 @@ unlock: return err; } +static inline pte_t iomap_pte(resource_size_t base, + dma_addr_t offset, + pgprot_t prot) +{ + return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot)); +} + /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, +static void *i915_gem_object_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { - unsigned long n_pages = obj->base.size >> PAGE_SHIFT; + unsigned long n_pte = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->mm.pages; - struct sgt_iter sgt_iter; - struct page *page; - struct page *stack_pages[32]; - struct page **pages = stack_pages; - unsigned long i = 0; + pte_t *stack[32], **mem; + struct vm_struct *area; pgprot_t pgprot; - void *addr; + + if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC) + return NULL; /* A single page can always be kmapped */ - if (n_pages == 1 && type == I915_MAP_WB) + if (n_pte == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); - if (n_pages > ARRAY_SIZE(stack_pages)) { + mem = stack; + if (n_pte > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ - pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); - if (!pages) + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); + if (!mem) return NULL; } - for_each_sgt_page(page, sgt_iter, sgt) - pages[i++] = page; - - /* Check that we have the expected number of pages */ - GEM_BUG_ON(i != n_pages); + area = alloc_vm_area(obj->base.size, mem); + if (!area) { + if (mem != stack) + kvfree(mem); + return NULL; + } switch (type) { default: @@ -276,12 +284,31 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, pgprot = pgprot_writecombine(PAGE_KERNEL_IO); break; } - addr = vmap(pages, n_pages, 0, pgprot); - if (pages != stack_pages) - kvfree(pages); + if (i915_gem_object_has_struct_page(obj)) { + struct sgt_iter iter; + struct page *page; + pte_t **ptes = mem; + + for_each_sgt_page(page, iter, sgt) + **ptes++ = mk_pte(page, pgprot); + } else { + resource_size_t iomap; + struct sgt_iter iter; + pte_t **ptes = mem; + dma_addr_t addr; - return addr; + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; + + for_each_sgt_daddr(addr, iter, sgt) + **ptes++ = iomap_pte(iomap, addr, pgprot); + } + + if (mem != stack) + kvfree(mem); + + return area->addr; } /* get, pin, and map the pages of the object into kernel space */ @@ -289,14 +316,16 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; + unsigned int flags; bool pinned; void *ptr; int err; - if (unlikely(!i915_gem_object_has_struct_page(obj))) + flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; + if (!i915_gem_object_type_has(obj, flags)) return ERR_PTR(-ENXIO); - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return ERR_PTR(err); @@ -325,10 +354,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); + unmap_object(obj, ptr); ptr = obj->mm.mapping = NULL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 2deac933cf59..b1b7c1b3038a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -13,8 +13,10 @@ #include <drm/drm_legacy.h> /* for drm_pci.h! */ #include <drm/drm_pci.h> +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_object.h" +#include "i915_gem_region.h" #include "i915_scatterlist.h" static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) @@ -60,7 +62,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) vaddr += PAGE_SIZE; } - i915_gem_chipset_flush(to_i915(obj->base.dev)); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { @@ -132,16 +134,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, drm_pci_free(obj->base.dev, obj->phys_handle); } -static void -i915_gem_object_release_phys(struct drm_i915_gem_object *obj) +static void phys_release(struct drm_i915_gem_object *obj) { - i915_gem_object_unpin_pages(obj); + fput(obj->base.filp); } static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, - .release = i915_gem_object_release_phys, + + .release = phys_release, }; int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -158,11 +160,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (obj->ops != &i915_gem_shmem_ops) return -EINVAL; - err = i915_gem_object_unbind(obj); + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (err) return err; - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->mm.madv != I915_MADV_WILLNEED) { err = -EFAULT; @@ -190,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) /* Perma-pin (until release) the physical set of pages */ __i915_gem_object_pin_pages(obj); - if (!IS_ERR_OR_NULL(pages)) + if (!IS_ERR_OR_NULL(pages)) { i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_release_memory_region(obj); + } mutex_unlock(&obj->mm.lock); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 914b5d4112bb..c8264eb036bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,149 +5,19 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = - container_of((struct list_head *)node, - typeof(*active), link); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); - } -} - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - call_idle_barriers(engine); /* cleanup after wedging */ - i915_gem_batch_pool_fini(&engine->batch_pool); - } - - i915_timelines_park(i915); - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int pm_notifier(struct notifier_block *nb, - unsigned long action, - void *data) -{ - struct drm_i915_private *i915 = - container_of(nb, typeof(*i915), gem.pm_notifier); - - switch (action) { - case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - break; - - case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); - break; - } - - return NOTIFY_OK; -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) -{ - bool result = !i915_terminally_wedged(i915); - - do { - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - i915_gem_set_wedged(i915); - result = false; - } - } while (i915_retire_requests(i915) && result); - - GEM_BUG_ON(i915->gt.awake); - return result; -} - -bool i915_gem_load_power_context(struct drm_i915_private *i915) -{ - return switch_to_kernel_context_sync(i915); -} void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); - mutex_lock(&i915->drm.struct_mutex); - /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -157,22 +27,9 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - flush_work(&i915->gem.idle_work); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + intel_gt_suspend_prepare(&i915->gt); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(i915); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -212,6 +69,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ + intel_gt_suspend_late(&i915->gt); + spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { LIST_HEAD(keep); @@ -236,60 +95,16 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); } void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - if (intel_gt_resume(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!i915_gem_load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - -void i915_gem_init__pm(struct drm_i915_private *i915) -{ - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - - i915->gem.pm_notifier.notifier_call = pm_notifier; - blocking_notifier_chain_register(&i915->gt.pm_notifications, - &i915->gem.pm_notifier); + intel_gt_resume(&i915->gt); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index 6f7d5d11ac3b..26b78dbdc225 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -12,9 +12,6 @@ struct drm_i915_private; struct work_struct; -void i915_gem_init__pm(struct drm_i915_private *i915); - -bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); void i915_gem_idle_work_handler(struct work_struct *work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c new file mode 100644 index 000000000000..1515384d7e0e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_gem_region.h" +#include "i915_drv.h" +#include "i915_trace.h" + +void +i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); + + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +int +i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + struct list_head *blocks = &obj->mm.blocks; + resource_size_t size = obj->base.size; + resource_size_t prev_end; + struct i915_buddy_block *block; + unsigned int flags; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); + if (ret) + goto err_free_sg; + + GEM_BUG_ON(list_empty(blocks)); + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, + i915_buddy_block_size(&mem->mm, block)); + offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(overflows_type(block_size, sg->length)); + + if (offset != prev_end || + add_overflows_t(typeof(sg->length), sg->length, block_size)) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = block_size; + + sg->length = block_size; + + st->nents++; + } else { + sg->length += block_size; + sg_dma_len(sg) += block_size; + } + + prev_end = offset + block_size; + } + + sg_page_sizes |= sg->length; + sg_mark_end(sg); + i915_sg_trim(st); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err_free_sg: + sg_free_table(st); + kfree(st); + return ret; +} + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags) +{ + INIT_LIST_HEAD(&obj->mm.blocks); + obj->mm.region = intel_memory_region_get(mem); + + obj->flags |= flags; + if (obj->base.size <= mem->min_page_size) + obj->flags |= I915_BO_ALLOC_CONTIGUOUS; + + mutex_lock(&mem->objects.lock); + + if (obj->flags & I915_BO_ALLOC_VOLATILE) + list_add(&obj->mm.region_link, &mem->objects.purgeable); + else + list_add(&obj->mm.region_link, &mem->objects.list); + + mutex_unlock(&mem->objects.lock); +} + +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + + mutex_lock(&mem->objects.lock); + list_del(&obj->mm.region_link); + mutex_unlock(&mem->objects.lock); + + intel_memory_region_put(mem); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + + /* + * NB: Our use of resource_size_t for the size stems from using struct + * resource for the mem->region. We might need to revisit this in the + * future. + */ + + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + + if (!mem) + return ERR_PTR(-ENODEV); + + size = round_up(size, mem->min_page_size); + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); + + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + */ + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = mem->ops->create_object(mem, size, flags); + if (!IS_ERR(obj)) + trace_i915_gem_object_create(obj); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h new file mode 100644 index 000000000000..f2ff6f8bff74 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_REGION_H__ +#define __I915_GEM_REGION_H__ + +#include <linux/types.h> + +struct intel_memory_region; +struct drm_i915_gem_object; +struct sg_table; + +int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); +void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags); +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 19d9ecdb2894..a2a980d9d241 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -7,9 +7,12 @@ #include <linux/pagevec.h> #include <linux/swap.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gemfs.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_trace.h" /* * Move pages to appropriate lru and release the pagevec, decrementing the @@ -25,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec) static int shmem_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; const unsigned long page_count = obj->base.size / PAGE_SIZE; unsigned long i; struct address_space *mapping; @@ -51,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (page_count > totalram_pages()) + if (obj->base.size > resource_size(&mem->region)) return -ENOMEM; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -414,6 +418,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj, return 0; } +static void shmem_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_release_memory_region(obj); + + fput(obj->base.filp); +} + const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, @@ -424,11 +435,13 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .writeback = shmem_writeback, .pwrite = shmem_pwrite, + + .release = shmem_release, }; -static int create_shmem(struct drm_i915_private *i915, - struct drm_gem_object *obj, - size_t size) +static int __create_shmem(struct drm_i915_private *i915, + struct drm_gem_object *obj, + resource_size_t size) { unsigned long flags = VM_NORESERVE; struct file *filp; @@ -447,31 +460,24 @@ static int create_shmem(struct drm_i915_private *i915, return 0; } -struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) +static struct drm_i915_gem_object * +create_shmem(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - /* There is a prevalence of the assumption that we fit the object's - * page count inside a 32bit _signed_ variable. Let's document this and - * catch if we ever need to fix it. In the meantime, if you do spot - * such a local variable, please consider fixing! - */ - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - obj = i915_gem_object_alloc(); if (!obj) return ERR_PTR(-ENOMEM); - ret = create_shmem(i915, &obj->base, size); + ret = __create_shmem(i915, &obj->base, size); if (ret) goto fail; @@ -486,7 +492,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -510,7 +516,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) i915_gem_object_set_cache_coherency(obj, cache_level); - trace_i915_gem_object_create(obj); + i915_gem_object_init_memory_region(obj, mem, 0); return obj; @@ -519,14 +525,22 @@ fail: return ERR_PTR(ret); } +struct drm_i915_gem_object * +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + size, 0); +} + /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, - const void *data, size_t size) + const void *data, resource_size_t size) { struct drm_i915_gem_object *obj; struct file *file; - size_t offset; + resource_size_t offset; int err; obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); @@ -569,3 +583,37 @@ fail: i915_gem_object_put(obj); return ERR_PTR(err); } + +static int init_shmem(struct intel_memory_region *mem) +{ + int err; + + err = i915_gemfs_init(mem->i915); + if (err) { + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", + err); + } + + intel_memory_region_set_name(mem, "system"); + + return 0; /* Don't error, we can simply fallback to the kernel mnt */ +} + +static void release_shmem(struct intel_memory_region *mem) +{ + i915_gemfs_fini(mem->i915); +} + +static const struct intel_memory_region_ops shmem_region_ops = { + .init = init_shmem, + .release = release_shmem, + .create_object = create_shmem, +}; + +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + &shmem_region_ops); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 3a926a8755c6..f7e4b39c734f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_shrinkable(obj)) return false; - /* Only report true if by unbinding the object and putting its pages + /* + * Only report true if by unbinding the object and putting its pages * we can actually make forward progress towards freeing physical * pages. * @@ -72,26 +39,26 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) return false; - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either + /* + * We can only return physical pages to the system if we can either * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, + unsigned long shrink) { - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + unsigned long flags; + + flags = 0; + if (shrink & I915_SHRINK_ACTIVE) + flags = I915_GEM_OBJECT_UNBIND_ACTIVE; + + if (i915_gem_object_unbind(obj, flags) == 0) + __i915_gem_object_put_pages(obj); + return !i915_gem_object_has_pages(obj); } @@ -154,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -169,7 +132,6 @@ i915_gem_shrink(struct drm_i915_private *i915, */ trace_i915_gem_shrink(i915, target, shrink); - i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -230,8 +192,7 @@ i915_gem_shrink(struct drm_i915_private *i915, continue; if (!(shrink & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) + i915_gem_object_is_framebuffer(obj)) continue; if (!(shrink & I915_SHRINK_BOUND) && @@ -246,10 +207,9 @@ i915_gem_shrink(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - if (unsafe_drop_pages(obj)) { + if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ - mutex_lock_nested(&obj->mm.lock, - I915_MM_SHRINKER); + mutex_lock(&obj->mm.lock); if (!i915_gem_object_has_pages(obj)) { try_to_writeback(obj, shrink); count += obj->base.size >> PAGE_SHIFT; @@ -269,10 +229,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - i915_retire_requests(i915); - - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -342,19 +298,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -369,8 +320,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -387,6 +336,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -422,16 +372,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; - - /* Force everything onto the inactive lists */ - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT)) - goto out; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -448,27 +388,16 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); -out: - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } -/** - * i915_gem_shrinker_register - Register the i915 shrinker - * @i915: i915 device - * - * This function registers and sets up the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_register(struct drm_i915_private *i915) +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) { i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; i915->mm.shrinker.count_objects = i915_gem_shrinker_count; @@ -483,13 +412,7 @@ void i915_gem_shrinker_register(struct drm_i915_private *i915) WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } -/** - * i915_gem_shrinker_unregister - Unregisters the i915 shrinker - * @i915: i915 device - * - * This function unregisters the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_unregister(struct drm_i915_private *i915) +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) { WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); @@ -512,24 +435,75 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); + mutex_release(&mutex->dep_map, _RET_IP_); + + fs_reclaim_release(GFP_KERNEL); + + if (unlock) + mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_); +} + +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. + * We can only be called while the pages are pinned or when + * the pages are released. If pinned, we should only be called + * from a single caller under controlled conditions; and on release + * only one caller may release us. Neither the two may cross. */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); + if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) + return; - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); - mutex_release(&mutex->dep_map, 0, _RET_IP_); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!atomic_fetch_inc(&obj->mm.shrink_pin) && + !list_empty(&obj->mm.link)) { + list_del_init(&obj->mm.link); + i915->mm.shrink_count--; + i915->mm.shrink_memory -= obj->base.size; + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); +} - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); +static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) +{ + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; - fs_reclaim_release(GFP_KERNEL); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + if (!i915_gem_object_is_shrinkable(obj)) + return; - if (unlock) - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); + if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) + return; + + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + if (atomic_dec_and_test(&obj->mm.shrink_pin)) { + GEM_BUG_ON(!list_empty(&obj->mm.link)); + + list_add_tail(&obj->mm.link, head); + i915->mm.shrink_count++; + i915->mm.shrink_memory += obj->base.size; + + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); +} + +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h new file mode 100644 index 000000000000..b397d7785789 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_SHRINKER_H__ +#define __I915_GEM_SHRINKER_H__ + +#include <linux/bits.h> + +struct drm_i915_private; +struct mutex; + +/* i915_gem_shrinker.c */ +unsigned long i915_gem_shrink(struct drm_i915_private *i915, + unsigned long target, + unsigned long *nr_scanned, + unsigned flags); +#define I915_SHRINK_UNBOUND BIT(0) +#define I915_SHRINK_BOUND BIT(1) +#define I915_SHRINK_ACTIVE BIT(2) +#define I915_SHRINK_VMAPS BIT(3) +#define I915_SHRINK_WRITEBACK BIT(4) + +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915); +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915); +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, + struct mutex *mutex); + +#endif /* __I915_GEM_SHRINKER_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index de1fab2058ec..451f3078d60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -10,7 +10,9 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gem_stolen.h" /* * The BIOS typically reserves some of the system's memory for the exclusive @@ -24,48 +26,49 @@ * for is a boon. */ -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment, u64 start, u64 end) { int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return -ENODEV; /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + if (INTEL_GEN(i915) >= 8 && start < 4096) start = 4096; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node, size, alignment, 0, start, end, DRM_MM_INSERT_BEST); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); return ret; } -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment) { - return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + return i915_gem_stolen_insert_node_in_range(i915, node, size, alignment, 0, U64_MAX); } -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, +void i915_gem_stolen_remove_node(struct drm_i915_private *i915, struct drm_mm_node *node) { - mutex_lock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); drm_mm_remove_node(node); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *dev_priv, +static int i915_adjust_stolen(struct drm_i915_private *i915, struct resource *dsm) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; struct resource *r; if (dsm->start == 0 || dsm->end <= dsm->start) @@ -77,14 +80,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, */ /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(dev_priv) <= 4 && - !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { + if (INTEL_GEN(i915) <= 4 && + !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; struct resource ggtt_res; resource_size_t ggtt_start; - ggtt_start = I915_READ(PGTBL_CTL); - if (IS_GEN(dev_priv, 4)) + ggtt_start = intel_uncore_read(uncore, PGTBL_CTL); + if (IS_GEN(i915, 4)) ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else @@ -118,7 +121,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * kernel. So if the region is already marked as busy, something * is seriously wrong. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + r = devm_request_mem_region(i915->drm.dev, dsm->start, resource_size(dsm), "Graphics Stolen Memory"); if (r == NULL) { @@ -131,14 +134,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * reservation starting from 1 instead of 0. * There's also BIOS with off-by-one on the other end. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + r = devm_request_mem_region(i915->drm.dev, dsm->start + 1, resource_size(dsm) - 2, "Graphics Stolen Memory"); /* * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ - if (r == NULL && !IS_GEN(dev_priv, 3)) { + if (!r && !IS_GEN(i915, 3)) { DRM_ERROR("conflict detected with stolen region: %pR\n", dsm); @@ -149,25 +152,27 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, return 0; } -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +static void i915_gem_cleanup_stolen(struct drm_i915_private *i915) { - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return; - drm_mm_takedown(&dev_priv->mm.stolen); + drm_mm_takedown(&i915->mm.stolen); } -static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void g4x_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, + IS_GM45(i915) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + IS_GM45(i915) ? "CTG" : "ELK", reg_val); if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; @@ -176,7 +181,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, * Whether ILK really reuses the ELK register for this is unclear. * Let's see if we catch anyone with this supposedly enabled on ILK. */ - WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", + WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n", reg_val); if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) @@ -188,11 +193,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, *size = stolen_top - *base; } -static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen6_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -220,12 +226,13 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void vlv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -248,11 +255,12 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, *base = stolen_top - *size; } -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen7_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -274,11 +282,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void chv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -306,12 +315,13 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void bdw_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -326,10 +336,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void icl_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED); + u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); @@ -354,75 +365,84 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +static int i915_gem_init_stolen(struct drm_i915_private *i915) { + struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - mutex_init(&dev_priv->mm.stolen_lock); + mutex_init(&i915->mm.stolen_lock); - if (intel_vgpu_active(dev_priv)) { - DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + if (intel_vgpu_active(i915)) { + dev_notice(i915->drm.dev, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); return 0; } - if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - DRM_INFO("DMAR active, disabling use of stolen memory\n"); + if (intel_vtd_active() && INTEL_GEN(i915) < 8) { + dev_notice(i915->drm.dev, + "%s, disabling use of stolen memory\n", + "DMAR active"); return 0; } if (resource_size(&intel_graphics_stolen_res) == 0) return 0; - dev_priv->dsm = intel_graphics_stolen_res; + i915->dsm = intel_graphics_stolen_res; - if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) + if (i915_adjust_stolen(i915, &i915->dsm)) return 0; - GEM_BUG_ON(dev_priv->dsm.start == 0); - GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + GEM_BUG_ON(i915->dsm.start == 0); + GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); - stolen_top = dev_priv->dsm.end + 1; + stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; reserved_size = 0; - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(i915)) { case 2: case 3: break; case 4: - if (!IS_G4X(dev_priv)) + if (!IS_G4X(i915)) break; /* fall through */ case 5: - g4x_get_stolen_reserved(dev_priv, + g4x_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 6: - gen6_get_stolen_reserved(dev_priv, + gen6_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 7: - if (IS_VALLEYVIEW(dev_priv)) - vlv_get_stolen_reserved(dev_priv, + if (IS_VALLEYVIEW(i915)) + vlv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - gen7_get_stolen_reserved(dev_priv, + gen7_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 8: case 9: case 10: - if (IS_LP(dev_priv)) - chv_get_stolen_reserved(dev_priv, + if (IS_LP(i915)) + chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - bdw_get_stolen_reserved(dev_priv, + bdw_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; - case 11: default: - icl_get_stolen_reserved(dev_priv, &reserved_base, + MISSING_CASE(INTEL_GEN(i915)); + /* fall-through */ + case 11: + case 12: + icl_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); break; } @@ -439,12 +459,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_size = 0; } - dev_priv->dsm_reserved = - (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, reserved_size); - if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) { DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &dev_priv->dsm_reserved, &dev_priv->dsm); + &i915->dsm_reserved, &i915->dsm); return 0; } @@ -453,14 +473,14 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_total = stolen_top - reserved_base; DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); + (u64)resource_size(&i915->dsm) >> 10, + ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total; + i915->stolen_usable_size = + resource_size(&i915->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); + drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); return 0; } @@ -469,11 +489,11 @@ static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, resource_size_t offset, resource_size_t size) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); + GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -493,7 +513,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, sg->offset = 0; sg->length = size; - sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; + sg_dma_address(sg) = (dma_addr_t)i915->dsm.start + offset; sg_dma_len(sg) = size; return st; @@ -524,14 +544,14 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, static void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); GEM_BUG_ON(!stolen); - __i915_gem_object_unpin_pages(obj); + i915_gem_object_release_memory_region(obj); - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); } @@ -542,83 +562,133 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { }; static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) +__i915_gem_object_create_stolen(struct intel_memory_region *mem, + struct drm_mm_node *stolen) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + int err = -ENOMEM; obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; + if (!obj) + goto err; - drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops); + drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (i915_gem_object_pin_pages(obj)) + err = i915_gem_object_pin_pages(obj); + if (err) goto cleanup; + i915_gem_object_init_memory_region(obj, mem, 0); + return obj; cleanup: i915_gem_object_free(obj); - return NULL; +err: + return ERR_PTR(err); } -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + if (!drm_mm_initialized(&i915->mm.stolen)) + return ERR_PTR(-ENODEV); if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); + ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); if (ret) { - kfree(stolen); - return NULL; + obj = ERR_PTR(ret); + goto err_free; } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; + obj = __i915_gem_object_create_stolen(mem, stolen); + if (IS_ERR(obj)) + goto err_remove; + + return obj; - i915_gem_stolen_remove_node(dev_priv, stolen); +err_remove: + i915_gem_stolen_remove_node(i915, stolen); +err_free: kfree(stolen); - return NULL; + return obj; } struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, +i915_gem_object_create_stolen(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN], + size, I915_BO_ALLOC_CONTIGUOUS); +} + +static int init_stolen(struct intel_memory_region *mem) +{ + intel_memory_region_set_name(mem, "stolen"); + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + return i915_gem_init_stolen(mem->i915); +} + +static void release_stolen(struct intel_memory_region *mem) +{ + i915_gem_cleanup_stolen(mem->i915); +} + +static const struct intel_memory_region_ops i915_region_stolen_ops = { + .init = init_stolen, + .release = release_stolen, + .create_object = _i915_gem_object_create_stolen, +}; + +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, + intel_graphics_stolen_res.start, + resource_size(&intel_graphics_stolen_res), + PAGE_SIZE, 0, + &i915_region_stolen_ops); +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t gtt_offset, resource_size_t size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; + struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; struct i915_vma *vma; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!drm_mm_initialized(&i915->mm.stolen)) + return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -627,29 +697,29 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (WARN_ON(size == 0) || WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); stolen->start = stolen_offset; stolen->size = size; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); + mutex_unlock(&i915->mm.stolen_lock); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); - return NULL; + return ERR_PTR(ret); } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { + obj = __i915_gem_object_create_stolen(mem, stolen); + if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); - return NULL; + return obj; } /* Some objects just need physical mem from stolen space */ @@ -671,22 +741,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); @@ -698,5 +772,5 @@ err_pages: i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); - return NULL; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h new file mode 100644 index 000000000000..c1040627fbf3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_STOLEN_H__ +#define __I915_GEM_STOLEN_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct drm_mm_node; +struct drm_i915_gem_object; + +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment); +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, + u64 end); +void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node); +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915); +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size); +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size); + +#endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index adb3074d9ce2..540ef0551789 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, long ret; /* ABI: return -EIO if already wedged */ - ret = i915_terminally_wedged(to_i915(dev)); + ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); if (ret) return ret; @@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - if (target) { + if (target && xchg(&target->file_priv, NULL)) list_del(&target->client_link); - target->file_priv = NULL; - } target = request; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f451742..6c7825a2dc2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "i915_gem.h" #include "i915_gem_ioctls.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" /** @@ -181,22 +182,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +216,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +236,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -313,10 +318,14 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_set_tiling *args = data; struct drm_i915_gem_object *obj; int err; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -340,9 +349,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x; else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, @@ -364,12 +373,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); @@ -402,6 +406,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = -ENOENT; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (obj) { @@ -415,10 +422,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, switch (args->tiling_mode) { case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y; break; default: case I915_TILING_NONE: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2caa594322bc..580319b7bf1a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -12,11 +12,10 @@ #include <drm/i915_drm.h> +#include "i915_drv.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" -#include "i915_trace.h" -#include "intel_drv.h" struct i915_mm_struct { struct mm_struct *mm; @@ -93,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -130,32 +128,14 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - - ret = i915_gem_object_unbind(obj); + ret = i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); if (ret == 0) - ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + ret = __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +148,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } @@ -427,7 +403,7 @@ struct get_pages_work { static struct sg_table * __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, int num_pages) + struct page **pvec, unsigned long num_pages) { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -473,9 +449,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { struct get_pages_work *work = container_of(_work, typeof(*work), work); struct drm_i915_gem_object *obj = work->obj; - const int npages = obj->base.size >> PAGE_SHIFT; + const unsigned long npages = obj->base.size >> PAGE_SHIFT; + unsigned long pinned; struct page **pvec; - int pinned, ret; + int ret; ret = -ENOMEM; pinned = 0; @@ -484,31 +461,36 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; unsigned int flags = 0; + int locked = 0; if (!i915_gem_object_is_readonly(obj)) flags |= FOLL_WRITE; ret = -EFAULT; if (mmget_not_zero(mm)) { - down_read(&mm->mmap_sem); while (pinned < npages) { + if (!locked) { + down_read(&mm->mmap_sem); + locked = 1; + } ret = get_user_pages_remote (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, - pvec + pinned, NULL, NULL); + pvec + pinned, NULL, &locked); if (ret < 0) break; pinned += ret; } - up_read(&mm->mmap_sem); + if (locked) + up_read(&mm->mmap_sem); mmput(mm); } } - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->userptr.work == &work->work) { struct sg_table *pages = ERR_PTR(ret); @@ -578,7 +560,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - const int num_pages = obj->base.size >> PAGE_SHIFT; + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; struct page **pvec; struct sg_table *pages; @@ -662,8 +644,16 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); + /* + * We always mark objects as dirty when they are used by the GPU, + * just in case. However, if we set the vma as being read-only we know + * that the object will never have been written to. + */ + if (i915_gem_object_is_readonly(obj)) + obj->mm.dirty = false; + for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { /* * As this may not be anonymous memory (e.g. shmem) * but exist on a real mapping, we have to lock @@ -671,8 +661,20 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, * the page reference is not sufficient to * prevent the inode from being truncated. * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. */ - set_page_dirty_lock(page); + set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); @@ -702,6 +704,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, @@ -749,6 +752,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; @@ -776,14 +780,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -EFAULT; if (args->flags & I915_USERPTR_READ_ONLY) { - struct i915_address_space *vm; - /* * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; - if (!vm || !vm->has_read_only) + if (!dev_priv->gt.vm->has_read_only) return -ENODEV; } @@ -792,7 +793,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 26ec6579b7cd..8af55cd3e690 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -31,11 +31,10 @@ i915_gem_object_wait_fence(struct dma_fence *fence, } static long -i915_gem_object_wait_reservation(struct reservation_object *resv, +i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; bool prune_fences = false; @@ -44,7 +43,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(resv, + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -73,7 +72,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, */ prune_fences = count && timeout >= 0; } else { - excl = reservation_object_get_excl_rcu(resv); + excl = dma_resv_get_excl_rcu(resv); } if (excl && timeout >= 0) @@ -83,15 +82,12 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, /* * Opportunistically prune the fences iff we know they have *all* been - * signaled and that the reservation object has not been changed (i.e. - * no new fences have been added). + * signaled. */ - if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { - if (reservation_object_trylock(resv)) { - if (!__read_seqcount_retry(&resv->seq, seq)) - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); - } + if (prune_fences && dma_resv_trylock(resv)) { + if (dma_resv_test_signaled_rcu(resv, true)) + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); } return timeout; @@ -144,7 +140,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(obj->base.resv, + ret = dma_resv_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -156,7 +152,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_rcu(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 099f3397aada..5e6e8c91ab38 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -20,31 +20,18 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (!type) return -ENODEV; - gemfs = kern_mount(type); - if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); - /* - * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most - * likely 2M. Note that within_size may overallocate huge-pages, if say - * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under - * memory pressure shmem should split any huge-pages which can be - * shrunk. + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. + * + * One example, although it is probably better with a per-file + * control, is selecting huge page allocations ("huge=within_size"). + * Currently unused due to bandwidth issues (slow reads) on Broadwell+. */ - if (has_transparent_hugepage()) { - struct super_block *sb = gemfs->mnt_sb; - /* FIXME: Disabled until we get W/A for read BW issue. */ - char options[] = "huge=never"; - int flags = 0; - int err; - - err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } - } + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); i915->mm.gemfs = gemfs; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 3c5d17b2b670..fa16f2c3f3ac 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -12,10 +12,14 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { unsigned long nreal = obj->scratch / PAGE_SIZE; - struct scatterlist *sg; + struct sgt_iter sgt_iter; + struct page *page; - for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) - __free_page(sg_page(sg)); + for_each_sgt_page(page, sgt_iter, pages) { + __free_page(page); + if (!--nreal) + break; + } sg_free_table(pages); kfree(pages); @@ -70,7 +74,6 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) err: huge_free_pages(obj, pages); - return -ENOMEM; #undef GFP } @@ -96,6 +99,7 @@ huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, dma_addr_t dma_size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -111,7 +115,7 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); + i915_gem_object_init(obj, &huge_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h index 549c1394bcdc..b8cf31b7bf14 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h @@ -7,6 +7,12 @@ #ifndef __HUGE_GEM_OBJECT_H #define __HUGE_GEM_OBJECT_H +#include <linux/types.h> + +#include "gem/i915_gem_object_types.h" + +struct drm_i915_private; + struct drm_i915_gem_object * huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b74729b6f353..9311250d7d6f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,13 +8,18 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + #include "igt_gem_utils.h" #include "mock_context.h" #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -111,8 +116,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -133,7 +136,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -148,6 +150,7 @@ huge_pages_object(struct drm_i915_private *i915, u64 size, unsigned int page_mask) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -164,7 +167,9 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_init(obj, &huge_page_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -225,8 +230,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -259,8 +262,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -279,7 +280,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -297,6 +297,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = { static struct drm_i915_gem_object * fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -315,9 +316,11 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single); + i915_gem_object_init(obj, &fake_ops_single, &lock_class); else - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -331,7 +334,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -445,6 +453,88 @@ out_device: return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + int i; + + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -560,7 +650,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } @@ -588,7 +678,7 @@ static void close_object_list(struct list_head *objects, list_del(&obj->st_link); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -858,7 +948,7 @@ static int igt_mock_ppgtt_64K(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -877,129 +967,25 @@ out_object_put: return err; } -static struct i915_vma * -gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) -{ - struct drm_i915_private *i915 = vma->vm->i915; - const int gen = INTEL_GEN(i915); - unsigned int count = vma->size >> PAGE_SHIFT; - struct drm_i915_gem_object *obj; - struct i915_vma *batch; - unsigned int size; - u32 *cmd; - int n; - int err; - - size = (1 + 4 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = val; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = val; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = val; - } - - offset += PAGE_SIZE; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (err) - goto err; - - return batch; - -err: - i915_gem_object_put(obj); - - return ERR_PTR(err); -} - -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u32 dword, - u32 value) +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, + u32 dw, + u32 val) { - struct i915_request *rq; - struct i915_vma *batch; int err; - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - batch = gpu_write_dw(vma, dword * sizeof(u32), value); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - if (err) - goto err_request; - - i915_vma_lock(vma); - err = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto err_request; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -err_request: + i915_gem_object_lock(vma->obj); + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (err) - i915_request_skip(rq, err); - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + return err; - return err; + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), + vma->size >> PAGE_SHIFT, val); } -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int +__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned int needs_flush; unsigned long n; @@ -1031,19 +1017,54 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n = obj->base.size >> PAGE_SHIFT; + u32 *ptr; + int err; + + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + ptr += dword; + while (n--) { + if (*ptr != val) { + pr_err("base[%u]=%08x, val=%08x\n", + dword, *ptr, val); + err = -EINVAL; + break; + } + + ptr += PAGE_SIZE / sizeof(*ptr); + } + + i915_gem_object_unpin_map(obj); + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + if (i915_gem_object_has_struct_page(obj)) + return __cpu_check_shmem(obj, dword, val); + else + return __cpu_check_vmap(obj, dword, val); +} + +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1057,7 +1078,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -1067,7 +1088,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -1082,22 +1103,20 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_destroy(vma); - + __i915_vma_put(vma); return err; } static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1111,19 +1130,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1132,23 +1150,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1160,22 +1185,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1267,7 +1293,7 @@ static int igt_ppgtt_exhaust_huge(void *arg) } i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -1283,131 +1309,235 @@ out_device: return err; } -static int igt_ppgtt_internal_huge(void *arg) +typedef struct drm_i915_gem_object * +(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static struct drm_i915_gem_object * +igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) +{ + if (!igt_can_allocate_thp(i915)) { + pr_info("%s missing THP support, skipping\n", __func__); + return ERR_PTR(-ENODEV); + } + + return i915_gem_object_create_shmem(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_internal(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return huge_pages_object(i915, size, size); +} + +static struct drm_i915_gem_object * +igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_lmem(i915, size, flags); +} + +static u32 igt_random_size(struct rnd_state *prng, + u32 min_page_size, + u32 max_page_size) +{ + u64 mask; + u32 size; + + GEM_BUG_ON(!is_power_of_2(min_page_size)); + GEM_BUG_ON(!is_power_of_2(max_page_size)); + GEM_BUG_ON(min_page_size < PAGE_SIZE); + GEM_BUG_ON(min_page_size > max_page_size); + + mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; + size = prandom_u32_state(prng) & mask; + if (size < min_page_size) + size |= min_page_size; + + return size; +} + +static int igt_ppgtt_smoke_huge(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, + I915_RND_STATE(prng); + struct { + igt_create_fn fn; + u32 min; + u32 max; + } backends[] = { + { igt_create_internal, SZ_64K, SZ_2M, }, + { igt_create_shmem, SZ_64K, SZ_32M, }, + { igt_create_local, SZ_64K, SZ_1G, }, }; - int i; int err; + int i; /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. + * Sanity check that the HW uses huge pages correctly through our + * various backends -- ensure that our writes land in the right place. */ - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + u32 min = backends[i].min; + u32 max = backends[i].max; + u32 size = max; +try_again: + size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = backends[i].fn(i915, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -E2BIG) { + size >>= 1; + goto try_again; + } else if (err == -ENODEV) { + err = 0; + continue; + } + + return err; + } err = i915_gem_object_pin_pages(obj); - if (err) + if (err) { + if (err == -ENXIO || err == -E2BIG) { + i915_gem_object_put(obj); + size >>= 1; + goto try_again; + } goto out_put; + } - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); + if (obj->mm.page_sizes.phys < min) { + pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", + __func__, size, i); + err = -ENOMEM; goto out_unpin; } err = igt_write_huge(ctx, obj); if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; + pr_err("%s write-huge failed with size=%u, i=%d\n", + __func__, size, i); } - +out_unpin: i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); +out_put: i915_gem_object_put(obj); - } - return 0; + if (err == -ENOMEM || err == -ENXIO) + err = 0; -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); + if (err) + break; - return err; -} + cond_resched(); + } -static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) -{ - return i915->mm.gemfs && has_transparent_hugepage(); + return err; } -static int igt_ppgtt_gemfs_huge(void *arg) +static int igt_ppgtt_sanity_check(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct { + igt_create_fn fn; + unsigned int flags; + } backends[] = { + { igt_create_system, 0, }, + { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, }; - int i; + struct { + u32 size; + u32 pages; + } combos[] = { + { SZ_64K, SZ_64K }, + { SZ_2M, SZ_2M }, + { SZ_2M, SZ_64K }, + { SZ_2M - SZ_64K, SZ_64K }, + { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, + { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + }; + int i, j; int err; + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. + * Sanity check that the HW behaves with a limited set of combinations. + * We already have a bunch of randomised testing, which should give us + * a decent amount of variation between runs, however we should keep + * this to limit the chances of introducing a temporary regression, by + * testing the most obvious cases that might make something blow up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + for (j = 0; j < ARRAY_SIZE(combos); ++j) { + struct drm_i915_gem_object *obj; + u32 size = combos[j].size; + u32 pages = combos[j].pages; + + obj = backends[i].fn(i915, size, backends[i].flags); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENODEV) { + pr_info("Device lacks local memory, skipping\n"); + err = 0; + break; + } - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + return err; + } - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; + GEM_BUG_ON(pages > obj->base.size); + pages = pages & supported; - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } + if (pages) + obj->mm.page_sizes.sg = pages; - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; + err = igt_write_huge(ctx, obj); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); + i915_gem_object_put(obj); + + if (err) { + pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", + __func__, size, pages, i, j); + goto out; + } } - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); + cond_resched(); } - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); +out: + if (err == -ENOMEM) + err = 0; return err; } @@ -1417,12 +1547,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1432,9 +1565,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1487,7 +1621,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; @@ -1518,11 +1652,24 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); + n = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + err = gpu_write(ce, vma, n++, 0xdeadbeaf); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); if (err) goto out_unpin; - err = cpu_check(obj, 0, 0xdeadbeaf); + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + goto out_unpin; + } out_unpin: i915_vma_unpin(vma); @@ -1530,6 +1677,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1539,7 +1688,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1589,6 +1738,7 @@ out_put: out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1596,11 +1746,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; - int err; + unsigned int n; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1609,12 +1762,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1635,11 +1790,20 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; + n = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + err = gpu_write(ce, vma, n++, 0xdeadbeaf); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1662,7 +1826,11 @@ static int igt_shrink_thp(void *arg) if (err) goto out_close; - err = cpu_check(obj, 0, 0xdeadbeaf); + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + break; + } out_unpin: i915_vma_unpin(vma); @@ -1670,6 +1838,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1678,6 +1848,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), @@ -1694,8 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; @@ -1720,58 +1890,52 @@ out_close: i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_shrink_thp), SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), + SUBTEST(igt_ppgtt_smoke_huge), + SUBTEST(igt_ppgtt_sanity_check), }; - struct drm_file *file; struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct i915_address_space *vm; + struct file *file; int err; - if (!HAS_PPGTT(dev_priv)) { + if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - file = mock_file(dev_priv); + file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - ctx = live_context(dev_priv, file); + ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - - mock_file_free(dev_priv, file); - +out_file: + fput(file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index f3a5eb807c1c..b972be165e85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,14 +5,17 @@ #include "i915_selftest.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" + #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" -static int igt_client_fill(void *arg) +static int __igt_client_fill(struct intel_engine_cs *engine) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; + struct intel_context *ce = engine->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -21,16 +24,21 @@ static int igt_client_fill(void *arg) prandom_seed_state(&prng, i915_selftest.random_seed); + intel_engine_pm_get(engine); do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); u32 val = prandom_u32_state(&prng); u32 i; sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -52,7 +60,8 @@ static int igt_client_fill(void *arg) * values after we do the set_to_cpu_domain and pick it up as a * test failure. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; @@ -63,24 +72,13 @@ static int igt_client_fill(void *arg) if (err) goto err_unpin; - /* - * XXX: For now do the wait without the object resv lock to - * ensure we don't deadlock. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - i915_gem_object_lock(obj); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -100,28 +98,46 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - if (err == -ENOMEM) err = 0; + intel_engine_pm_put(engine); return err; } +static int igt_client_fill(void *arg) +{ + int inst = 0; + + do { + struct intel_engine_cs *engine; + int err; + + engine = intel_engine_lookup_user(arg, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + err = __igt_client_fill(engine); + if (err == -ENOMEM) + err = 0; + if (err) + return err; + } while (1); +} + int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_client_fill), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 8f22d3f18422..3f6079e1dfb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,12 +6,20 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +struct context { + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; +}; + +static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; @@ -19,11 +27,11 @@ static int cpu_set(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_write(obj, &needs_clflush); + err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -36,14 +44,12 @@ static int cpu_set(struct drm_i915_gem_object *obj, drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; @@ -51,11 +57,11 @@ static int cpu_get(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_read(obj, &needs_clflush); + err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -65,136 +71,137 @@ static int cpu_get(struct drm_i915_gem_object *obj, *v = *cpu; kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int wc_set(struct context *ctx, unsigned long offset, u32 v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int wc_get(struct context *ctx, unsigned long offset, u32 *v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gpu_set(struct context *ctx, unsigned long offset, u32 v) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(i915->engine[RCS0]->kernel_context); + rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -207,12 +214,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(cs); } - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(ctx->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(ctx->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; @@ -226,7 +233,9 @@ static int gpu_set(struct drm_i915_gem_object *obj, intel_ring_advance(rq, cs); i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); i915_vma_unpin(vma); @@ -235,29 +244,34 @@ static int gpu_set(struct drm_i915_gem_object *obj, return err; } -static bool always_valid(struct drm_i915_private *i915) +static bool always_valid(struct context *ctx) { return true; } -static bool needs_fence_registers(struct drm_i915_private *i915) +static bool needs_fence_registers(struct context *ctx) { - return !i915_terminally_wedged(i915); + struct intel_gt *gt = ctx->engine->gt; + + if (intel_gt_is_wedged(gt)) + return false; + + return gt->ggtt->num_fences; } -static bool needs_mi_store_dword(struct drm_i915_private *i915) +static bool needs_mi_store_dword(struct context *ctx) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(ctx->engine->gt)) return false; - return intel_engine_can_store_dword(i915->engine[RCS0]); + return intel_engine_can_store_dword(ctx->engine); } static const struct igt_coherency_mode { const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); + int (*set)(struct context *ctx, unsigned long offset, u32 v); + int (*get)(struct context *ctx, unsigned long offset, u32 *v); + bool (*valid)(struct context *ctx); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, { "gtt", gtt_set, gtt_get, needs_fence_registers }, @@ -266,19 +280,37 @@ static const struct igt_coherency_mode { { }, }; +static struct intel_engine_cs * +random_engine(struct drm_i915_private *i915, struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + count = i915_prandom_u32_max_state(count, prng); + for_each_uabi_engine(engine, i915) + if (count-- == 0) + return engine; + + return NULL; +} + static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; + I915_RND_STATE(prng); + struct context ctx; int err = 0; - /* We repeatedly write, overwrite and read from a sequence of + /* + * We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes * from either the CPU or GPU). Each setter/getter uses our cache * domain API which should prevent incoherency. @@ -292,34 +324,40 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + ctx.engine = random_engine(i915, &prng); + if (!ctx.engine) { + err = -ENODEV; + goto out_free; + } + pr_info("%s: using %s\n", __func__, ctx.engine->name); + intel_engine_pm_get(ctx.engine); + for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; - if (!over->valid(i915)) + if (!over->valid(&ctx)) continue; for (write = igt_coherency_mode; write->name; write++) { if (!write->set) continue; - if (!write->valid(i915)) + if (!write->valid(&ctx)) continue; for (read = igt_coherency_mode; read->name; read++) { if (!read->get) continue; - if (!read->valid(i915)) + if (!read->valid(&ctx)) continue; for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto unlock; + ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(ctx.obj)) { + err = PTR_ERR(ctx.obj); + goto out_pm; } i915_random_reorder(offsets, ncachelines, &prng); @@ -327,7 +365,7 @@ static int igt_gem_coherency(void *arg) values[n] = prandom_u32_state(&prng); for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); + err = over->set(&ctx, offsets[n], ~values[n]); if (err) { pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", n, count, over->name, err); @@ -336,7 +374,7 @@ static int igt_gem_coherency(void *arg) } for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); + err = write->set(&ctx, offsets[n], values[n]); if (err) { pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", n, count, write->name, err); @@ -347,7 +385,7 @@ static int igt_gem_coherency(void *arg) for (n = 0; n < count; n++) { u32 found; - err = read->get(obj, offsets[n], &found); + err = read->get(&ctx, offsets[n], &found); if (err) { pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", n, count, read->name, err); @@ -365,20 +403,20 @@ static int igt_gem_coherency(void *arg) } } - i915_gem_object_put(obj); + i915_gem_object_put(ctx.obj); } } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_pm: + intel_engine_pm_put(ctx.engine); +out_free: kfree(offsets); return err; put_object: - i915_gem_object_put(obj); - goto unlock; + i915_gem_object_put(ctx.obj); + goto out_pm; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index eaa2b16574c7..7fc46861a54d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -7,6 +7,9 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -24,16 +27,20 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) +static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) +{ + /* single threaded, private ctx */ + return rcu_dereference_protected(ctx->vm, true); +} + static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_gem_context **ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; struct igt_live_test t; - struct drm_file *file; + struct file *file; unsigned long n; int err = -ENODEV; @@ -52,43 +59,49 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } - for_each_engine(engine, i915, id) { - struct i915_request *rq; + for_each_uabi_engine(engine, i915) { + struct i915_request *rq = NULL; unsigned long end_time, prime; ktime_t times[2] = {}; times[0] = ktime_get_raw(); for (n = 0; n < nctx; n++) { - rq = igt_request_alloc(ctx[n], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + struct i915_request *this; + + this = igt_request_alloc(ctx[n], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out_file; } - i915_request_add(rq); + if (rq) { + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); + } + rq = i915_request_get(this); + i915_request_add(this); } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); err = -EIO; - goto out_unlock; + goto out_file; } + i915_request_put(rq); times[1] = ktime_get_raw(); @@ -97,17 +110,25 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { times[1] = ktime_get_raw(); + rq = NULL; for (n = 0; n < prime; n++) { - rq = igt_request_alloc(ctx[n % nctx], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + struct i915_request *this; + + this = igt_request_alloc(ctx[n % nctx], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out_file; + } + + if (rq) { /* Force submission order */ + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); } /* @@ -124,14 +145,18 @@ static int live_nop_switch(void *arg) * for latency. */ - i915_request_add(rq); + rq = i915_request_get(this); + i915_request_add(this); } + GEM_BUG_ON(!rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); break; } + i915_request_put(rq); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 2) @@ -143,7 +168,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -151,75 +176,237 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); +out_file: + fput(file); return err; } -static struct i915_vma * -gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) { - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); - unsigned long n, size; - u32 *cmd; - int err; + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; - size = (4 * count + 1) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(vma->vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; + err = 0; + for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + } + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + struct i915_request *rq = NULL; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + int err = 0; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return err; + } + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(rq); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; } - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = value; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = value; + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out_file; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out; } - offset += PAGE_SIZE; + data[m++].ce[0] = intel_context_get(ce); } - *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); + i915_gem_context_unlock_engines(ctx); - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + data[m].ce[n] = ce; + } } - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; - return vma; + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; -err: - i915_gem_object_put(obj); - return ERR_PTR(err); + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + +out: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + kfree(data); +out_file: + fput(file); + return err; } static unsigned long real_page_count(struct drm_i915_gem_object *obj) @@ -232,100 +419,39 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - struct i915_request *rq; struct i915_vma *vma; - struct i915_vma *batch; - unsigned int flags; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; - /* Within the GTT the huge objects maps every page onto + /* + * Within the GTT the huge objects maps every page onto * its 1024 real pages (using phys_pfn = dma_pfn % 1024). * We set the nth dword within the page using the nth * mapping via the GTT - this should exercise the GTT mapping * whilst checking that each context provides a unique view * into the object. */ - batch = gpu_fill_dw(vma, - (dw * real_page_count(obj)) << PAGE_SHIFT | - (dw * sizeof(u32)), - real_page_count(obj), - dw); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - if (err) - goto skip_request; - - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto skip_request; - - i915_request_add(rq); - - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + err = igt_gpu_fill_dw(ce, vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); i915_vma_unpin(vma); - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -err_vma: - i915_vma_unpin(vma); return err; } @@ -404,17 +530,17 @@ out_unmap: return err; } -static int file_add_object(struct drm_file *file, - struct drm_i915_gem_object *obj) +static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) { int err; GEM_BUG_ON(obj->base.handle_count); /* tie the object to the drm_file for easy reaping */ - err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + err = idr_alloc(&to_drm_file(file)->object_idr, + &obj->base, 1, 0, GFP_KERNEL); if (err < 0) - return err; + return err; i915_gem_object_get(obj); obj->base.handle_count++; @@ -422,19 +548,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, - struct drm_file *file, +create_test_object(struct i915_address_space *vm, + struct file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; + /* Keep in GEM's good graces */ + intel_gt_retire_requests(vm->gt); + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -462,11 +590,49 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - enum intel_engine_id id; int err = -ENODEV; /* @@ -478,13 +644,14 @@ static int igt_ctx_exec(void *arg) if (!DRIVER_CAPS(i915)->has_logical_contexts) return 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; - struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; if (!intel_engine_can_store_dword(engine)) continue; @@ -496,41 +663,53 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } } - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -540,6 +719,9 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -557,14 +739,16 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); + fput(file); if (err) return err; + + i915_gem_drain_freed_objects(i915); } return 0; @@ -573,11 +757,11 @@ out_unlock: static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; - enum intel_engine_id id; struct igt_live_test t; - struct drm_file *file; + struct file *file; int err = 0; /* @@ -592,24 +776,22 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { unsigned long ncontexts, ndwords, dw; struct drm_i915_gem_object *obj = NULL; IGT_TIMEOUT(end_time); @@ -623,7 +805,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -631,25 +813,39 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } - __assign_ppgtt(ctx, parent->vm); + mutex_lock(&ctx->mutex); + __assign_ppgtt(ctx, ctx_vm(parent)); + mutex_unlock(&ctx->mutex); + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(ctx_vm(parent), + file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } @@ -662,6 +858,7 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + intel_context_put(ce); kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -678,14 +875,15 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } + + i915_gem_drain_freed_objects(i915); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); +out_file: + fput(file); return err; } @@ -717,6 +915,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -746,7 +946,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, ce->gem_context->vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -779,21 +979,22 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, goto err_request; i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -807,8 +1008,7 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -820,8 +1020,7 @@ err_vma: #define TEST_RESET BIT(2) static int -__sseu_prepare(struct drm_i915_private *i915, - const char *name, +__sseu_prepare(const char *name, unsigned int flags, struct intel_context *ce, struct igt_spinner **spin) @@ -837,14 +1036,11 @@ __sseu_prepare(struct drm_i915_private *i915, if (!*spin) return -ENOMEM; - ret = igt_spinner_init(*spin, i915); + ret = igt_spinner_init(*spin, ce->engine->gt); if (ret) goto err_free; - rq = igt_spinner_create_request(*spin, - ce->gem_context, - ce->engine, - MI_NOOP); + rq = igt_spinner_create_request(*spin, ce, MI_NOOP); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto err_fini; @@ -870,8 +1066,7 @@ err_free: } static int -__read_slice_count(struct drm_i915_private *i915, - struct intel_context *ce, +__read_slice_count(struct intel_context *ce, struct drm_i915_gem_object *obj, struct igt_spinner *spin, u32 *rpcs) @@ -900,7 +1095,7 @@ __read_slice_count(struct drm_i915_private *i915, return ret; } - if (INTEL_GEN(i915) >= 11) { + if (INTEL_GEN(ce->engine->i915) >= 11) { s_mask = GEN11_RPCS_S_CNT_MASK; s_shift = GEN11_RPCS_S_CNT_SHIFT; } else { @@ -943,8 +1138,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, } static int -__sseu_finish(struct drm_i915_private *i915, - const char *name, +__sseu_finish(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -956,19 +1150,18 @@ __sseu_finish(struct drm_i915_private *i915, int ret = 0; if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); + ret = intel_engine_reset(ce->engine, "sseu"); if (ret) goto out; } - ret = __read_slice_count(i915, ce, obj, + ret = __read_slice_count(ce, obj, flags & TEST_RESET ? NULL : spin, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); if (ret) goto out; - ret = __read_slice_count(i915, ce->engine->kernel_context, obj, - NULL, &rpcs); + ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); out: @@ -976,11 +1169,11 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT); + ret = igt_flush_test(ce->engine->i915); if (ret) return ret; - ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); + ret = __read_slice_count(ce, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", " after idle!"); } @@ -989,8 +1182,7 @@ out: } static int -__sseu_test(struct drm_i915_private *i915, - const char *name, +__sseu_test(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -999,15 +1191,17 @@ __sseu_test(struct drm_i915_private *i915, struct igt_spinner *spin = NULL; int ret; - ret = __sseu_prepare(i915, name, flags, ce, &spin); + intel_engine_pm_get(ce->engine); + + ret = __sseu_prepare(name, flags, ce, &spin); if (ret) - return ret; + goto out_pm; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; - ret = __sseu_finish(i915, name, flags, ce, obj, + ret = __sseu_finish(name, flags, ce, obj, hweight32(sseu.slice_mask), spin); out_spin: @@ -1016,6 +1210,8 @@ out_spin: igt_spinner_fini(spin); kfree(spin); } +out_pm: + intel_engine_pm_put(ce->engine); return ret; } @@ -1024,53 +1220,15 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - intel_wakeref_t wakeref; - struct drm_file *file; - int ret; - - if (INTEL_GEN(i915) < 9) - return 0; - - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; + int inst = 0; + int ret = 0; - if (hweight32(default_sseu.slice_mask) < 2) + if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = default_sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(default_sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - if (flags & TEST_RESET) - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ + igt_global_reset_lock(&i915->gt); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1078,56 +1236,79 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_unlock; } - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + do { + struct intel_engine_cs *engine; + struct intel_context *ce; + struct intel_sseu pg_sseu; - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_rpm; - } + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_RENDER, + inst++); + if (!engine) + break; - ret = intel_context_pin(ce); - if (ret) - goto out_context; + if (hweight32(engine->sseu.slice_mask) < 2) + continue; - /* First set the default mask. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; + /* + * Gen11 VME friendly power-gated configuration with + * half enabled sub-slices. + */ + pg_sseu = engine->sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); + + pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + engine->name, name, flags, + hweight32(engine->sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_put; + } - /* Then set a power-gated configuration. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + ret = intel_context_pin(ce); + if (ret) + goto out_ce; - /* Back to defaults. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; + /* First set the default mask. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; - /* One last power-gated configuration for the road. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + /* Then set a power-gated configuration. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + + /* Back to defaults. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; -out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) +out_unpin: + intel_context_unpin(ce); +out_ce: + intel_context_put(ce); + } while (!ret); + + if (igt_flush_test(i915)) ret = -EIO; - intel_context_unpin(ce); -out_context: - intel_context_put(ce); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); +out_put: i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) - igt_global_reset_unlock(i915); - - mock_file_free(i915, file); + igt_global_reset_unlock(&i915->gt); if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1161,15 +1342,18 @@ static int igt_ctx_sseu(void *arg) static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; + unsigned long idx, ndwords, dw, num_engines; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; + struct i915_gem_engines_iter it; struct i915_address_space *vm; struct i915_gem_context *ctx; - unsigned long idx, ndwords, dw; + struct intel_context *ce; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; int err = -ENODEV; /* @@ -1182,56 +1366,63 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm; + vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; - goto out_unlock; + goto out_file; } + num_engines = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + if (intel_engine_can_store_dword(ce->engine)) + num_engines++; + i915_gem_context_unlock_engines(ctx); + ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; - - for_each_engine(engine, i915, id) { - intel_wakeref_t wakeref; - - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (prandom_u32_state(&prng) & 1) i915_gem_object_set_readonly(obj); } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + ce->engine->name, + yesno(!!ctx_vm(ctx)), + err); + i915_gem_context_unlock_engines(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1240,9 +1431,10 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } - pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_engines); + pr_info("Submitted %lu dwords (across %lu engines)\n", + ndwords, num_engines); dw = 0; idx = 0; @@ -1262,19 +1454,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); + fput(file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1292,6 +1484,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1306,7 +1499,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } *cmd++ = MI_STORE_DWORD_IMM_GEN4; @@ -1322,17 +1515,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto out_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1347,26 +1543,27 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); i915_vma_unlock(vma); if (err) goto skip_request; i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); i915_request_add(rq); - return 0; - + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err: +out_vm: + i915_vm_put(vm); +out: i915_gem_object_put(obj); return err; } @@ -1377,6 +1574,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1393,7 +1591,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1419,17 +1617,20 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto out_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1444,7 +1645,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; @@ -1458,27 +1661,27 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto out_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out_vm; } *value = cmd[result / sizeof(*cmd)]; i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - return 0; + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err: +out_vm: + i915_vm_put(vm); +out: i915_gem_object_put(obj); return err; } @@ -1487,13 +1690,11 @@ static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; + unsigned long num_engines, count; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); - unsigned long count; - unsigned int id; + struct file *file; u64 vm_total; int err; @@ -1509,36 +1710,33 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + goto out_file; - vm_total = ctx_a->vm->total; - GEM_BUG_ON(ctx_b->vm->total != vm_total); + vm_total = ctx_vm(ctx_a)->total; + GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + num_engines = 0; + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); unsigned long this = 0; @@ -1551,7 +1749,7 @@ static int igt_vm_isolation(void *arg) div64_u64_rem(i915_prandom_u64_state(&prng), vm_total, &offset); - offset &= -sizeof(u32); + offset = round_down(offset, alignof_dword); offset += I915_GTT_PAGE_SIZE; err = write_to_scratch(ctx_a, engine, @@ -1560,7 +1758,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_rpm; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1569,42 +1767,24 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_rpm; + goto out_file; } this++; } count += this; + num_engines++; } - pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_engines); + pr_info("Checked %lu scratch offsets across %lu engines\n", + count, num_engines); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); + fput(file); return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; @@ -1632,13 +1812,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1711,8 +1887,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x @@ -1736,10 +1910,11 @@ int i915_gem_context_mock_selftests(void) return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), @@ -1747,8 +1922,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_vm_isolation), }; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, dev_priv); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index e3a64edef918..2a52b92586b9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); i915_gem_object_put(obj); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", @@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -254,106 +254,6 @@ err_obj: return err; } -static int igt_dmabuf_export_kmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - err = PTR_ERR(dmabuf); - pr_err("i915_gem_prime_export failed with err=%d\n", err); - return err; - } - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] not initialiased to zero!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto err; - } - memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - ptr = dma_buf_kmap(dmabuf, 1); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 1, ptr); - pr_err("Exported page[1] not set to 0xaa!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 1, ptr); - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] did not retain 0xc5!\n"); - err = -EINVAL; - goto err; - } - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = dma_buf_kmap(dmabuf, 2); - if (ptr) { - pr_err("Erroneously kmapped beyond the end of the object!\n"); - dma_buf_kunmap(dmabuf, 2, ptr); - err = -EINVAL; - goto err; - } - - ptr = dma_buf_kmap(dmabuf, -1); - if (ptr) { - pr_err("Erroneously kmapped before the start of the object!\n"); - dma_buf_kunmap(dmabuf, -1, ptr); - err = -EINVAL; - goto err; - } - - err = 0; -err: - dma_buf_put(dmabuf); - return err; -} - int i915_gem_dmabuf_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -362,7 +262,6 @@ int i915_gem_dmabuf_mock_selftests(void) SUBTEST(igt_dmabuf_import), SUBTEST(igt_dmabuf_import_ownership), SUBTEST(igt_dmabuf_export_vmap), - SUBTEST(igt_dmabuf_export_kmap), }; struct drm_i915_private *i915; int err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5c81f4b4813a..ef7c74cff28a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -6,10 +6,15 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gem/i915_gem_region.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" +#include "selftests/igt_mmap.h" struct tile { unsigned int width; @@ -75,18 +80,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + __i915_vma_put(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -143,7 +233,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_flush_ggtt_writes(to_i915(obj->base.dev)); + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -168,12 +258,43 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (err) return err; - i915_vma_destroy(vma); + __i915_vma_put(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -183,6 +304,9 @@ static int igt_partial_tiling(void *arg) int tiling; int err; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + /* We want to check the page mapping and fencing of a large object * mmapped through the GTT. The object we create is larger than can * possibly be mmaped as a whole, and so we must use partial GGTT vma. @@ -204,7 +328,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -218,7 +341,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -240,10 +363,10 @@ static int igt_partial_tiling(void *arg) tile.tiling = tiling; switch (tiling) { case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } @@ -252,31 +375,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -284,7 +387,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -293,7 +396,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -304,7 +407,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -317,82 +420,188 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); return err; } -static int make_obj_busy(struct drm_i915_gem_object *obj) +static int igt_smoke_tiling(void *arg) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; - struct i915_vma *vma; + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); int err; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - i915_request_add(rq); + count = 0; + do { + struct tile tile; - i915_vma_unpin(vma); - i915_gem_object_put(obj); /* leave it only alive via its active ref */ + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->ggtt.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->ggtt.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); return err; } +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_engine_cs *engine; + + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, + EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + + i915_request_add(rq); + i915_vma_unpin(vma); + if (err) + return err; + } + + i915_gem_object_put(obj); /* leave it only alive via its active ref */ + return 0; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) { struct drm_i915_gem_object *obj; - int err; + struct i915_mmap_offset *mmo; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return PTR_ERR(obj); - err = create_mmap_offset(obj); + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); i915_gem_object_put(obj); - return err == expected; + return PTR_ERR_OR_ZERO(mmo) == expected; } static void disable_retire_worker(struct drm_i915_private *i915) { - i915_gem_shrinker_unregister(i915); - - intel_gt_pm_get(i915); - - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); + i915_gem_driver_unregister__shrinker(i915); + intel_gt_pm_get(&i915->gt); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { - intel_gt_pm_put(i915); + igt_flush_test(i915); + intel_gt_pm_put(&i915->gt); + i915_gem_driver_register__shrinker(i915); +} - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); +static void mmap_offset_lock(struct drm_i915_private *i915) + __acquires(&i915->drm.vma_offset_manager->vm_lock) +{ + write_lock(&i915->drm.vma_offset_manager->vm_lock); +} - i915_gem_shrinker_register(i915); +static void mmap_offset_unlock(struct drm_i915_private *i915) + __releases(&i915->drm.vma_offset_manager->vm_lock) +{ + write_unlock(&i915->drm.vma_offset_manager->vm_lock); } static int igt_mmap_offset_exhaustion(void *arg) @@ -400,26 +609,50 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_i915_private *i915 = arg; struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; - struct drm_mm_node resv, *hole; - u64 hole_start, hole_end; - int loop, err; + struct drm_mm_node *hole, *next; + struct i915_mmap_offset *mmo; + int loop, err = 0; /* Disable background reaper */ disable_retire_worker(i915); GEM_BUG_ON(!i915->gt.awake); + intel_gt_retire_requests(&i915->gt); + i915_gem_drain_freed_objects(i915); /* Trim the device mmap space to only a page */ - memset(&resv, 0, sizeof(resv)); - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - resv.start = hole_start; - resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ - err = drm_mm_reserve_node(mm, &resv); + mmap_offset_lock(i915); + loop = 1; /* PAGE_SIZE units */ + list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) { + struct drm_mm_node *resv; + + resv = kzalloc(sizeof(*resv), GFP_NOWAIT); + if (!resv) { + err = -ENOMEM; + goto out_park; + } + + resv->start = drm_mm_hole_node_start(hole) + loop; + resv->size = hole->hole_size - loop; + resv->color = -1ul; + loop = 0; + + if (!resv->size) { + kfree(resv); + continue; + } + + pr_debug("Reserving hole [%llx + %llx]\n", + resv->start, resv->size); + + err = drm_mm_reserve_node(mm, resv); if (err) { pr_err("Failed to trim VMA manager, err=%d\n", err); + kfree(resv); goto out_park; } - break; } + GEM_BUG_ON(!list_is_singular(&mm->hole_stack)); + mmap_offset_unlock(i915); /* Just fits! */ if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { @@ -442,9 +675,10 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - err = create_mmap_offset(obj); - if (err) { + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + if (IS_ERR(mmo)) { pr_err("Unable to insert object into reclaimed hole\n"); + err = PTR_ERR(mmo); goto err_obj; } @@ -458,7 +692,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -467,27 +701,24 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; } - - /* NB we rely on the _active_ reference to access obj now */ - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - err = create_mmap_offset(obj); - if (err) { - pr_err("[loop %d] create_mmap_offset failed with err=%d\n", - loop, err); - goto out; - } } out: - drm_mm_remove_node(&resv); + mmap_offset_lock(i915); out_park: + drm_mm_for_each_node_safe(hole, next, mm) { + if (hole->color != -1ul) + continue; + + drm_mm_remove_node(hole); + kfree(hole); + } + mmap_offset_unlock(i915); restore_retire_worker(i915); return err; err_obj: @@ -495,11 +726,515 @@ err_obj: goto out; } +static int gtt_set(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + void __iomem *map; + int err = 0; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out; + } + + memset_io(map, POISON_INUSE, obj->base.size); + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int gtt_check(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + void __iomem *map; + int err = 0; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out; + } + + if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (GTT)\n", + obj->mm.region->name); + err = -EINVAL; + } + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int wc_set(struct drm_i915_gem_object *obj) +{ + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + memset(vaddr, POISON_INUSE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_check(struct drm_i915_gem_object *obj) +{ + void *vaddr; + int err = 0; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (WC)\n", + obj->mm.region->name); + err = -EINVAL; + } + i915_gem_object_unpin_map(obj); + + return err; +} + +static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) +{ + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; + + if (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) + return false; + + return true; +} + +#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) +static int __igt_mmap(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + struct vm_area_struct *area; + unsigned long addr; + int err, i; + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr); + + area = find_vma(current->mm, addr); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + if (area->vm_private_data != mmo) { + pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + for (i = 0; i < obj->base.size / sizeof(u32); i++) { + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); + u32 x; + + if (get_user(x, ux)) { + pr_err("%s: Unable to read from mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + err = -EFAULT; + goto out_unmap; + } + + if (x != expand32(POISON_INUSE)) { + pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + obj->mm.region->name, + i * sizeof(x), x, expand32(POISON_INUSE)); + err = -EINVAL; + goto out_unmap; + } + + x = expand32(POISON_FREE); + if (put_user(x, ux)) { + pr_err("%s: Unable to write to mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + err = -EFAULT; + goto out_unmap; + } + } + + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + err = wc_check(obj); + if (err == -ENXIO) + err = gtt_check(obj); +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + unsigned long sizes[] = { + PAGE_SIZE, + mr->min_page_size, + SZ_4M, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, sizes[i], 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + } + + return 0; +} + +static int __igt_mmap_gpu(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct intel_engine_cs *engine; + struct i915_mmap_offset *mmo; + unsigned long addr; + u32 __user *ux; + u32 bbe; + int err; + + /* + * Verify that the mmap access into the backing store aligns with + * that of the GPU, i.e. that mmap is indeed writing into the same + * page as being read by the GPU. + */ + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + ux = u64_to_user_ptr((u64)addr); + bbe = MI_BATCH_BUFFER_END; + if (put_user(bbe, ux)) { + pr_err("%s: Unable to write to mmap\n", obj->mm.region->name); + err = -EFAULT; + goto out_unmap; + } + + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + + vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unmap; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_unmap; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + err = engine->emit_bb_start(rq, vma->node.start, 0, 0); + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("%s(%s, %s): Failed to execute batch\n", + __func__, engine->name, obj->mm.region->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + intel_gt_set_wedged(engine->gt); + err = -EIO; + } + i915_request_put(rq); + +out_unpin: + i915_vma_unpin(vma); + if (err) + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_gpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int check_present_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (!pte_present(*pte) || pte_none(*pte)) { + pr_err("missing PTE:%lx\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_absent_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (pte_present(*pte) && !pte_none(*pte)) { + pr_err("present PTE:%lx; expected to be revoked\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_present(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_present_pte, (void *)addr); +} + +static int check_absent(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_absent_pte, (void *)addr); +} + +static int prefault_range(u64 start, u64 len) +{ + const char __user *addr, *end; + char __maybe_unused c; + int err; + + addr = u64_to_user_ptr(start); + end = addr + len; + + for (; addr < end; addr += PAGE_SIZE) { + err = __get_user(c, addr); + if (err) + return err; + } + + return __get_user(c, end - 1); +} + +static int __igt_mmap_revoke(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + unsigned long addr; + int err; + + if (!can_mmap(obj, type)) + return 0; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + err = prefault_range(addr, obj->base.size); + if (err) + goto out_unmap; + + GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT && + !atomic_read(&obj->bind_count)); + + err = check_present(addr, obj->base.size); + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); + goto out_unmap; + } + + /* + * After unbinding the object from the GGTT, its address may be reused + * for other objects. Ergo we have to revoke the previous mmap PTE + * access as it no longer points to the same object. + */ + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) { + pr_err("Failed to unbind object!\n"); + goto out_unmap; + } + GEM_BUG_ON(atomic_read(&obj->bind_count)); + + if (type != I915_MMAP_TYPE_GTT) { + __i915_gem_object_put_pages(obj); + if (i915_gem_object_has_pages(obj)) { + pr_err("Failed to put-pages object!\n"); + err = -EINVAL; + goto out_unmap; + } + } + + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_revoke(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), + SUBTEST(igt_mmap), + SUBTEST(igt_mmap_revoke), + SUBTEST(igt_mmap_gpu), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e23d8c9e9298..62077fe46715 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,34 +3,254 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/sort.h> + +#include "gt/intel_gt.h" +#include "gt/intel_engine_user.h" + #include "i915_selftest.h" +#include "gem/i915_gem_context.h" #include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" -static int igt_fill_blt(void *arg) +static int wrap_ktime_compare(const void *A, const void *B) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; - struct drm_i915_gem_object *obj; + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_fill_blt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + intel_engine_pm_get(engine); + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_fill_blt(obj, ce, 0); + if (err) + break; + + err = i915_gem_object_wait(obj, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + intel_engine_pm_put(engine); + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", + engine->name, + obj->base.size >> 10, + div64_u64(mul_u32_u32(4 * obj->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_fill_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __perf_fill_blt(obj); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst) +{ + struct drm_i915_private *i915 = to_i915(src->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err = 0; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + intel_engine_pm_get(engine); + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + break; + + err = i915_gem_object_wait(dst, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + intel_engine_pm_put(engine); + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", + engine->name, + src->base.size >> 10, + div64_u64(mul_u32_u32(4 * src->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_copy_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(src, dst); + + i915_gem_object_put(dst); +err_src: + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +struct igt_thread_arg { + struct drm_i915_private *i915; + struct i915_gem_context *ctx; + struct file *file; struct rnd_state prng; + unsigned int n_cpus; +}; + +static int igt_fill_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; + + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + } - prandom_seed_state(&prng, i915_selftest.random_seed); + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; - u32 val = prandom_u32_state(&prng); + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -46,14 +266,13 @@ static int igt_fill_blt(void *arg) * Make sure the potentially async clflush does its job, if * required. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -63,7 +282,7 @@ static int igt_fill_blt(void *arg) if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -83,28 +302,261 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); + if (err == -ENOMEM) + err = 0; + + intel_context_put(ce); + return err; +} + +static int igt_copy_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *src, *dst; + struct i915_gem_context *ctx; + struct intel_context *ce; + unsigned int prio; + IGT_TIMEOUT(end); + int err; + + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + } + + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); + + do { + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; + u32 i; + + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + + sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); + + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); + + src = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(src)) { + err = PTR_ERR(src); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_src; + } + + memset32(vaddr, val, + huge_gem_object_phys_size(src) / sizeof(u32)); + + i915_gem_object_unpin_map(src); + + if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + src->cache_dirty = true; + + dst = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_put_src; + } + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_dst; + } + + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(dst) / sizeof(u32)); + + if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + dst->cache_dirty = true; + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + goto err_unpin; + + i915_gem_object_lock(dst); + err = i915_gem_object_set_to_cpu_domain(dst, false); + i915_gem_object_unlock(dst); + if (err) + goto err_unpin; + for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(dst); + + i915_gem_object_put(src); + i915_gem_object_put(dst); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(dst); +err_put_dst: + i915_gem_object_put(dst); +err_put_src: + i915_gem_object_put(src); +err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); + return err; +} + +static int igt_threaded_blt(struct drm_i915_private *i915, + int (*blt_fn)(void *arg), + unsigned int flags) +#define SINGLE_CTX BIT(0) +{ + struct igt_thread_arg *thread; + struct task_struct **tsk; + unsigned int n_cpus, i; + I915_RND_STATE(prng); + int err = 0; + + n_cpus = num_online_cpus() + 1; + + tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!tsk) + return 0; + + thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); + if (!thread) + goto out_tsk; + + thread[0].file = mock_file(i915); + if (IS_ERR(thread[0].file)) { + err = PTR_ERR(thread[0].file); + goto out_thread; + } + + if (flags & SINGLE_CTX) { + thread[0].ctx = live_context(i915, thread[0].file); + if (IS_ERR(thread[0].ctx)) { + err = PTR_ERR(thread[0].ctx); + goto out_file; + } + } + + for (i = 0; i < n_cpus; ++i) { + thread[i].i915 = i915; + thread[i].file = thread[0].file; + thread[i].ctx = thread[0].ctx; + thread[i].n_cpus = n_cpus; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); + if (IS_ERR(tsk[i])) { + err = PTR_ERR(tsk[i]); + break; + } + + get_task_struct(tsk[i]); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + int status; + + if (IS_ERR_OR_NULL(tsk[i])) + continue; + + status = kthread_stop(tsk[i]); + if (status && !err) + err = status; + + put_task_struct(tsk[i]); + } + +out_file: + fput(thread[0].file); +out_thread: + kfree(thread); +out_tsk: + kfree(tsk); return err; } +static int igt_fill_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread, 0); +} + +static int igt_fill_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX); +} + +static int igt_copy_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread, 0); +} + +static int igt_copy_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX); +} + int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_fill_blt), + SUBTEST(igt_fill_blt_ctx0), + SUBTEST(igt_copy_blt), + SUBTEST(igt_copy_blt_ctx0), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); +} + +int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_fill_blt), + SUBTEST(perf_copy_blt), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index b232e6d2cd92..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,9 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" +#include "i915_vma.h" +#include "i915_drv.h" #include "i915_request.h" @@ -23,7 +26,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ce = i915_gem_context_get_engine(ctx, engine->id); + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); if (IS_ERR(ce)) return ERR_CAST(ce); @@ -32,3 +35,134 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) return rq; } + +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = val; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + intel_gt_chipset_flush(vma->vm->gt); + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) +{ + struct i915_request *rq; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + batch = igt_emit_store_dw(vma, offset, count, val); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(ce->vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto skip_request; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) + goto skip_request; + + i915_request_add(rq); + + i915_vma_unpin_and_release(&batch, 0); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 0f17251cf75d..4221cf84d175 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -7,11 +7,26 @@ #ifndef __IGT_GEM_UTILS_H__ #define __IGT_GEM_UTILS_H__ +#include <linux/types.h> + struct i915_request; struct i915_gem_context; +struct i915_vma; + +struct intel_context; struct intel_engine_cs; struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val); + +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); + #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..384143aa7776 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -5,6 +5,7 @@ */ #include "mock_context.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gtt.h" struct i915_gem_context * @@ -13,7 +14,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + i915_gem_context_set_persistence(ctx); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) @@ -30,32 +32,26 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; - ctx->name = kstrdup(name, GFP_KERNEL); - if (!ctx->name) - goto err_put; + strncpy(ctx->name, name, sizeof(ctx->name)); ppgtt = mock_ppgtt(i915, name); if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; @@ -73,22 +69,21 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file) +live_context(struct drm_i915_private *i915, struct file *file) { struct i915_gem_context *ctx; int err; - - lockdep_assert_held(&i915->drm.struct_mutex); + u32 id; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = gem_context_register(ctx, file->driver_priv); + err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); if (err < 0) goto err_ctx; @@ -102,7 +97,16 @@ err_ctx: struct i915_gem_context * kernel_context(struct drm_i915_private *i915) { - return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_persistence(ctx); + + return ctx; } void kernel_context_close(struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 0b926653914f..fb83d2f09212 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -7,6 +7,9 @@ #ifndef __MOCK_CONTEXT_H #define __MOCK_CONTEXT_H +struct file; +struct drm_i915_private; + void mock_init_contexts(struct drm_i915_private *i915); struct i915_gem_context * @@ -16,7 +19,7 @@ mock_context(struct drm_i915_private *i915, void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file); +live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context *kernel_context(struct drm_i915_private *i915); void kernel_context_close(struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index b9e059d4328a..9272bef57092 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -76,20 +76,6 @@ static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) vm_unmap_ram(vaddr, mock->npages); } -static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kmap(mock->pages[page_num]); -} - -static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kunmap(mock->pages[page_num]); -} - static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { return -ENODEV; @@ -99,8 +85,6 @@ static const struct dma_buf_ops mock_dmabuf_ops = { .map_dma_buf = mock_map_dma_buf, .unmap_dma_buf = mock_unmap_dma_buf, .release = mock_dmabuf_release, - .map = mock_dmabuf_kmap, - .unmap = mock_dmabuf_kunmap, .mmap = mock_dmabuf_mmap, .vmap = mock_dmabuf_vmap, .vunmap = mock_dmabuf_vunmap, diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h index f0f8bbd82dfc..22818bbb139d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h @@ -14,7 +14,7 @@ struct mock_dmabuf { struct page *pages[]; }; -static struct mock_dmabuf *to_mock(struct dma_buf *buf) +static inline struct mock_dmabuf *to_mock(struct dma_buf *buf) { return buf->priv; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h index 370360b4a148..688511afa883 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h @@ -7,6 +7,8 @@ #ifndef __MOCK_GEM_OBJECT_H__ #define __MOCK_GEM_OBJECT_H__ +#include "gem/i915_gem_object_types.h" + struct mock_object { struct drm_i915_gem_object base; }; diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile deleted file mode 100644 index 1c75b5c9790c..000000000000 --- a/drivers/gpu/drm/i915/gt/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# Extra header tests -include $(src)/Makefile.header-test diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test deleted file mode 100644 index 61e06cbb4b32..000000000000 --- a/drivers/gpu/drm/i915/gt/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(wildcard $(src)/*.h)) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c new file mode 100644 index 000000000000..6a5e9ab20b94 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "i915_drv.h" /* for_each_engine! */ +#include "intel_engine.h" + +static int engines_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct drm_printer p; + + p = drm_seq_file_printer(m); + for_each_engine(engine, gt, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(engines); + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "engines", &engines_fops }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h new file mode 100644 index 000000000000..f69257eaa1cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_ENGINES_H +#define DEBUGFS_ENGINES_H + +struct intel_gt; +struct dentry; + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_ENGINES_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c new file mode 100644 index 000000000000..75255aaacaed --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" + +void debugfs_gt_register(struct intel_gt *gt) +{ + struct dentry *root; + + if (!gt->i915->drm.primary->debugfs_root) + return; + + root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + if (IS_ERR(root)) + return; + + debugfs_engines_register(gt, root); + debugfs_gt_pm_register(gt, root); +} + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count) +{ + while (count--) { + if (!files->eval || files->eval(gt)) + debugfs_create_file(files->name, + 0444, root, gt, + files->fops); + + files++; + } +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h new file mode 100644 index 000000000000..4ea0f06cda8f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_H +#define DEBUGFS_GT_H + +#include <linux/file.h> + +struct intel_gt; + +#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \ + static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +void debugfs_gt_register(struct intel_gt *gt); + +struct debugfs_gt_file { + const char *name; + const struct file_operations *fops; + bool (*eval)(const struct intel_gt *gt); +}; + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count); + +#endif /* DEBUGFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c new file mode 100644 index 000000000000..059c9e5c002e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/seq_file.h> + +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_rc6.h" +#include "intel_rps.h" +#include "intel_runtime_pm.h" +#include "intel_sideband.h" +#include "intel_uncore.h" + +static int fw_domains_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; + + seq_printf(m, "user.bypass_count = %u\n", + uncore->user_forcewake_count); + + for_each_fw_domain(fw_domain, uncore, tmp) + seq_printf(m, "%s.wake_count = %u\n", + intel_uncore_forcewake_domain_to_str(fw_domain->id), + READ_ONCE(fw_domain->wake_count)); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains); + +static void print_rc6_res(struct seq_file *m, + const char *title, + const i915_reg_t reg) +{ + struct intel_gt *gt = m->private; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(gt->uncore, reg), + intel_rc6_residency_us(>->rc6, reg)); +} + +static int vlv_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rcctl1, pw_status; + + pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_EI_MODE(1)))); + seq_printf(m, "Render Power Well: %s\n", + (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); + + print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); + print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + + return fw_domains_show(m, NULL); +} + +static int gen6_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; + + gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + if (INTEL_GEN(i915) >= 9) { + gen9_powergate_enable = + intel_uncore_read(uncore, GEN9_PG_ENABLE); + gen9_powergate_status = + intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); + } + + if (INTEL_GEN(i915) <= 7) + sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + + seq_printf(m, "RC1e Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } + seq_printf(m, "Deep RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); + seq_printf(m, "Deepest RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE)); + seq_puts(m, "Current RC state: "); + switch (gt_core_status & GEN6_RCn_MASK) { + case GEN6_RC0: + if (gt_core_status & GEN6_CORE_CPD_STATE_MASK) + seq_puts(m, "Core Power Down\n"); + else + seq_puts(m, "on\n"); + break; + case GEN6_RC3: + seq_puts(m, "RC3\n"); + break; + case GEN6_RC6: + seq_puts(m, "RC6\n"); + break; + case GEN6_RC7: + seq_puts(m, "RC7\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Core Power Down: %s\n", + yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } + + /* Not exactly sure what this is */ + print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + GEN6_GT_GFX_RC6_LOCKED); + print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); + print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); + print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + + if (INTEL_GEN(i915) <= 7) { + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + } + + return fw_domains_show(m, NULL); +} + +static int ilk_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rgvmodectl, rstdbyctl; + u16 crstandvid; + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL); + crstandvid = intel_uncore_read16(uncore, CRSTANDVID); + + seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); + seq_printf(m, "Boost freq: %d\n", + (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> + MEMMODE_BOOST_FREQ_SHIFT); + seq_printf(m, "HW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); + seq_printf(m, "SW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_SWMODE_EN)); + seq_printf(m, "Gated voltage change: %s\n", + yesno(rgvmodectl & MEMMODE_RCLK_GATE)); + seq_printf(m, "Starting frequency: P%d\n", + (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT); + seq_printf(m, "Max P-state: P%d\n", + (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT); + seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK)); + seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f)); + seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); + seq_printf(m, "Render standby enabled: %s\n", + yesno(!(rstdbyctl & RCX_SW_EXIT))); + seq_puts(m, "Current RS state: "); + switch (rstdbyctl & RSX_STATUS_MASK) { + case RSX_STATUS_ON: + seq_puts(m, "on\n"); + break; + case RSX_STATUS_RC1: + seq_puts(m, "RC1\n"); + break; + case RSX_STATUS_RC1E: + seq_puts(m, "RC1E\n"); + break; + case RSX_STATUS_RS1: + seq_puts(m, "RS1\n"); + break; + case RSX_STATUS_RS2: + seq_puts(m, "RS2 (RC6)\n"); + break; + case RSX_STATUS_RS3: + seq_puts(m, "RC3 (RC6+)\n"); + break; + default: + seq_puts(m, "unknown\n"); + break; + } + + return 0; +} + +static int drpc_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + int err = -ENODEV; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_drpc(m); + else if (INTEL_GEN(i915) >= 6) + err = gen6_drpc(m); + else + err = ilk_drpc(m); + } + + return err; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc); + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(uncore->rpm); + + if (IS_GEN(i915, 5)) { + u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); + + seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); + seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + MEMSTAT_VID_SHIFT); + seq_printf(m, "Current P-state: %d\n", + (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + u32 rpmodectl, freq_sts; + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + vlv_punit_get(i915); + freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + + seq_printf(m, "actual GPU freq: %d MHz\n", + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); + + seq_printf(m, "current GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + + seq_printf(m, "max GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "min GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + + seq_printf(m, "idle GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + + seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else if (INTEL_GEN(i915) >= 6) { + u32 rp_state_limits; + u32 gt_perf_status; + u32 rp_state_cap; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpupei, rpcurup, rpprevup; + u32 rpdownei, rpcurdown, rpprevdown; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + int max_freq; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + if (IS_GEN9_LP(i915)) { + rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + } else { + rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + } + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (INTEL_GEN(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (INTEL_GEN(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (INTEL_GEN(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (INTEL_GEN(i915) <= 10) + seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + seq_printf(m, "Render p-state ratio: %d\n", + (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + seq_printf(m, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + seq_printf(m, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); + seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); + seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); + seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); + seq_printf(m, "CAGF: %dMHz\n", cagf); + seq_printf(m, "RP CUR UP EI: %d (%dus)\n", + rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei)); + seq_printf(m, "RP CUR UP: %d (%dus)\n", + rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dus)\n", + rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup)); + seq_printf(m, "Up threshold: %d%%\n", + rps->power.up_threshold); + + seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", + rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dus)\n", + rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dus)\n", + rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown)); + seq_printf(m, "Down threshold: %d%%\n", + rps->power.down_threshold); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : + rp_state_cap >> 16) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (rp_state_cap & 0xff00) >> 8; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : + rp_state_cap >> 0) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + seq_printf(m, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + seq_printf(m, "Actual freq: %d MHz\n", cagf); + seq_printf(m, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + seq_printf(m, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + seq_printf(m, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else { + seq_puts(m, "no P-state info available\n"); + } + + seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + + intel_runtime_pm_put(uncore->rpm, wakeref); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency); + +static int llc_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + const bool edram = INTEL_GEN(i915) > 8; + struct intel_rps *rps = >->rps; + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq, ia_freq; + + seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915))); + seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC", + i915->edram_size_mb); + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + ia_freq = gpu_freq; + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? + GEN9_FREQ_SCALER : 1))), + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return 0; +} + +static bool llc_eval(const struct intel_gt *gt) +{ + return HAS_LLC(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(llc); + +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + +static int rps_boost_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_rps *rps = >->rps; + + seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); + seq_printf(m, "Boosts outstanding? %d\n", + atomic_read(&rps->num_waiters)); + seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); + seq_printf(m, "Frequency requested %d, actual %d\n", + intel_gpu_freq(rps, rps->cur_freq), + intel_rps_read_actual_frequency(rps)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); + + seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + + if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) { + struct intel_uncore *uncore = gt->uncore; + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(rps->power.mode)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + rpup && rpupei ? 100 * rpup / rpupei : 0, + rps->power.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, + rps->power.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + + return 0; +} + +static bool rps_eval(const struct intel_gt *gt) +{ + return HAS_RPS(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost); + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "drpc", &drpc_fops, NULL }, + { "frequency", &frequency_fops, NULL }, + { "forcewake", &fw_domains_fops, NULL }, + { "llc", &llc_fops, llc_eval }, + { "rps_boost", &rps_boost_fops, rps_eval }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h new file mode 100644 index 000000000000..4cf5f5c9da7d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_PM_H +#define DEBUGFS_GT_PM_H + +struct intel_gt; +struct dentry; + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c new file mode 100644 index 000000000000..f4fec7eb4064 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen6_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" + +/* Write pde (index) from the page directory @pd to the page table @pt */ +static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) +{ + /* Caller needs to make sure the write completes if necessary */ + iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); +} + +void gen7_ppgtt_enable(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 ecochk; + + intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); + + ecochk = intel_uncore_read(uncore, GAM_ECOCHK); + if (IS_HASWELL(i915)) { + ecochk |= ECOCHK_PPGTT_WB_HSW; + } else { + ecochk |= ECOCHK_PPGTT_LLC_IVB; + ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; + } + intel_uncore_write(uncore, GAM_ECOCHK, ecochk); + + for_each_engine(engine, gt, id) { + /* GFX_MODE is per-ring on gen7+ */ + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + } +} + +void gen6_ppgtt_enable(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_rmw(uncore, + GAC_ECO_BITS, + 0, + ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); + + intel_uncore_rmw(uncore, + GAB_CTL, + 0, + GAB_CTL_CONT_AFTER_PAGEFAULT); + + intel_uncore_rmw(uncore, + GAM_ECOCHK, + 0, + ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); + + if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ + intel_uncore_write(uncore, + GFX_MODE, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); +} + +/* PPGTT support for Sandybdrige/Gen6 and later */ +static void gen6_ppgtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + const gen6_pte_t scratch_pte = vm->scratch[0].encode; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + + while (num_entries) { + struct i915_page_table * const pt = + i915_pt_entry(ppgtt->base.pd, pde++); + const unsigned int count = min(num_entries, GEN6_PTES - pte); + gen6_pte_t *vaddr; + + GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); + + num_entries -= count; + + GEM_BUG_ON(count > atomic_read(&pt->used)); + if (!atomic_sub_return(count, &pt->used)) + ppgtt->scan_for_unused_pt = true; + + /* + * Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ + + vaddr = kmap_atomic_px(pt); + memset32(vaddr + pte, scratch_pte, count); + kunmap_atomic(vaddr); + + pte = 0; + } +} + +static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory * const pd = ppgtt->pd; + unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; + unsigned int act_pt = first_entry / GEN6_PTES; + unsigned int act_pte = first_entry % GEN6_PTES; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter = sgt_dma(vma); + gen6_pte_t *vaddr; + + GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); + + vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); + do { + GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE); + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); + + iter.dma += I915_GTT_PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; + + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } + + if (++act_pte == GEN6_PTES) { + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); + act_pte = 0; + } + } while (1); + kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt; + unsigned int pde; + + start = round_down(start, SZ_64K); + end = round_up(end, SZ_64K) - start; + + mutex_lock(&ppgtt->flush); + + gen6_for_each_pde(pt, pd, start, end, pde) + gen6_write_pde(ppgtt, pde, pt); + + mb(); + ioread32(ppgtt->pd_addr + pde - 1); + gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); + mb(); + + mutex_unlock(&ppgtt->flush); +} + +static int gen6_alloc_va_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt, *alloc = NULL; + intel_wakeref_t wakeref; + u64 from = start; + unsigned int pde; + int ret = 0; + + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + + spin_lock(&pd->lock); + gen6_for_each_pde(pt, pd, start, length, pde) { + const unsigned int count = gen6_pte_count(start, length); + + if (px_base(pt) == px_base(&vm->scratch[1])) { + spin_unlock(&pd->lock); + + pt = fetch_and_zero(&alloc); + if (!pt) + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto unwind_out; + } + + fill32_px(pt, vm->scratch[0].encode); + + spin_lock(&pd->lock); + if (pd->entry[pde] == &vm->scratch[1]) { + pd->entry[pde] = pt; + } else { + alloc = pt; + pt = pd->entry[pde]; + } + } + + atomic_add(count, &pt->used); + } + spin_unlock(&pd->lock); + + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) + gen6_flush_pd(ppgtt, from, start); + + goto out; + +unwind_out: + gen6_ppgtt_clear_range(vm, from, start - from); +out: + if (alloc) + free_px(vm, alloc); + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + return ret; +} + +static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) +{ + struct i915_address_space * const vm = &ppgtt->base.vm; + struct i915_page_directory * const pd = ppgtt->base.pd; + int ret; + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + vm->pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_NONE, PTE_READ_ONLY); + + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { + cleanup_scratch_page(vm); + return -ENOMEM; + } + + fill32_px(&vm->scratch[1], vm->scratch[0].encode); + memset_p(pd->entry, &vm->scratch[1], I915_PDES); + + return 0; +} + +static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + u32 pde; + + gen6_for_all_pdes(pt, pd, pde) + if (px_base(pt) != scratch) + free_px(&ppgtt->base.vm, pt); +} + +static void gen6_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + + __i915_vma_put(ppgtt->vma); + + gen6_ppgtt_free_pd(ppgtt); + free_scratch(vm); + + mutex_destroy(&ppgtt->flush); + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt->base.pd); +} + +static int pd_vma_set_pages(struct i915_vma *vma) +{ + vma->pages = ERR_PTR(-ENODEV); + return 0; +} + +static void pd_vma_clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + vma->pages = NULL; +} + +static int pd_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + struct gen6_ppgtt *ppgtt = vma->private; + u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; + + px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; + + gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); + return 0; +} + +static void pd_vma_unbind(struct i915_vma *vma) +{ + struct gen6_ppgtt *ppgtt = vma->private; + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + unsigned int pde; + + if (!ppgtt->scan_for_unused_pt) + return; + + /* Free all no longer used page tables */ + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { + if (px_base(pt) == scratch || atomic_read(&pt->used)) + continue; + + free_px(&ppgtt->base.vm, pt); + pd->entry[pde] = scratch; + } + + ppgtt->scan_for_unused_pt = false; +} + +static const struct i915_vma_ops pd_vma_ops = { + .set_pages = pd_vma_set_pages, + .clear_pages = pd_vma_clear_pages, + .bind_vma = pd_vma_bind, + .unbind_vma = pd_vma_unbind, +}; + +static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) +{ + struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_vma *vma; + + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(size > ggtt->vm.total); + + vma = i915_vma_alloc(); + if (!vma) + return ERR_PTR(-ENOMEM); + + i915_active_init(&vma->active, NULL, NULL); + + kref_init(&vma->ref); + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); + vma->ops = &pd_vma_ops; + vma->private = ppgtt; + + vma->size = size; + vma->fence_size = size; + atomic_set(&vma->flags, I915_VMA_GGTT); + vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ + + INIT_LIST_HEAD(&vma->obj_link); + INIT_LIST_HEAD(&vma->closed_link); + + return vma; +} + +int gen6_ppgtt_pin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + int err; + + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); + + /* + * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt + * which will be pinned into every active context. + * (When vma->pin_count becomes atomic, I expect we will naturally + * need a larger, unpacked, type and kill this redundancy.) + */ + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) + return 0; + + if (mutex_lock_interruptible(&ppgtt->pin_mutex)) + return -EINTR; + + /* + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The + * allocator works in address space sizes, so it's multiplied by page + * size. We allocate at the top of the GTT to avoid fragmentation. + */ + err = 0; + if (!atomic_read(&ppgtt->pin_count)) + err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + if (!err) + atomic_inc(&ppgtt->pin_count); + mutex_unlock(&ppgtt->pin_mutex); + + return err; +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); +} + +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + if (!atomic_read(&ppgtt->pin_count)) + return; + + i915_vma_unpin(ppgtt->vma); + atomic_set(&ppgtt->pin_count, 0); +} + +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ggtt * const ggtt = gt->ggtt; + struct gen6_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + mutex_init(&ppgtt->flush); + mutex_init(&ppgtt->pin_mutex); + + ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.top = 1; + + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + + ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + + ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); + if (!ppgtt->base.pd) { + err = -ENOMEM; + goto err_free; + } + + err = gen6_ppgtt_init_scratch(ppgtt); + if (err) + goto err_pd; + + ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + goto err_scratch; + } + + return &ppgtt->base; + +err_scratch: + free_scratch(&ppgtt->base.vm); +err_pd: + kfree(ppgtt->base.pd); +err_free: + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h new file mode 100644 index 000000000000..72e481806c96 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN6_PPGTT_H__ +#define __GEN6_PPGTT_H__ + +#include "intel_gtt.h" + +struct gen6_ppgtt { + struct i915_ppgtt base; + + struct mutex flush; + struct i915_vma *vma; + gen6_pte_t __iomem *pd_addr; + + atomic_t pin_count; + struct mutex pin_mutex; + + bool scan_for_unused_pt; +}; + +static inline u32 gen6_pte_index(u32 addr) +{ + return i915_pte_index(addr, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pte_count(u32 addr, u32 length) +{ + return i915_pte_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pde_index(u32 addr) +{ + return i915_pde_index(addr, GEN6_PDE_SHIFT); +} + +#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) + +static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) +{ + BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base)); + return __to_gen6_ppgtt(base); +} + +/* + * gen6_for_each_pde() iterates over every pde from start until start+length. + * If start and start+length are not perfectly divisible, the macro will round + * down and up as needed. Start=0 and length=2G effectively iterates over + * every PDE in the system. The macro modifies ALL its parameters except 'pd', + * so each of the other parameters should preferably be a simple variable, or + * at most an lvalue with no side-effects! + */ +#define gen6_for_each_pde(pt, pd, start, length, iter) \ + for (iter = gen6_pde_index(start); \ + length > 0 && iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ + temp = min(temp - start, length); \ + start += temp, length -= temp; }), ++iter) + +#define gen6_for_all_pdes(pt, pd, iter) \ + for (iter = 0; \ + iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ++iter) + +int gen6_ppgtt_pin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); +void gen6_ppgtt_enable(struct intel_gt *gt); +void gen7_ppgtt_enable(struct intel_gt *gt); +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.c b/drivers/gpu/drm/i915/gt/gen6_renderstate.c index 11c8e7b3dd7c..11c8e7b3dd7c 100644 --- a/drivers/gpu/drm/i915/intel_renderstate_gen6.c +++ b/drivers/gpu/drm/i915/gt/gen6_renderstate.c diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.c b/drivers/gpu/drm/i915/gt/gen7_renderstate.c index 655180646152..655180646152 100644 --- a/drivers/gpu/drm/i915/intel_renderstate_gen7.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderstate.c diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c new file mode 100644 index 000000000000..4d1de2d97d5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen8_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +static u64 gen8_pde_encode(const dma_addr_t addr, + const enum i915_cache_level level) +{ + u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + + if (level != I915_CACHE_NONE) + pde |= PPAT_CACHED_PDE; + else + pde |= PPAT_UNCACHED; + + return pde; +} + +static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +{ + struct drm_i915_private *i915 = ppgtt->vm.i915; + struct intel_uncore *uncore = ppgtt->vm.gt->uncore; + enum vgt_g2v_type msg; + int i; + + if (create) + atomic_inc(px_used(ppgtt->pd)); /* never remove */ + else + atomic_dec(px_used(ppgtt->pd)); + + mutex_lock(&i915->vgpu.lock); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + const u64 daddr = px_dma(ppgtt->pd); + + intel_uncore_write(uncore, + vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); + + msg = create ? + VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + + intel_uncore_write(uncore, + vgtif_reg(pdp[i].lo), + lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[i].hi), + upper_32_bits(daddr)); + } + + msg = create ? + VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; + } + + /* g2v_notify atomically (via hv trap) consumes the message packet. */ + intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); + + mutex_unlock(&i915->vgpu.lock); +} + +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ +#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ +#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) +#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) +#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) + +#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) + +static inline unsigned int +gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) +{ + const int shift = gen8_pd_shift(lvl); + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + end += ~mask >> gen8_pd_shift(1); + + *idx = i915_pde_index(start, shift); + if ((start ^ end) & mask) + return GEN8_PDES - *idx; + else + return i915_pde_index(end, shift) - *idx; +} + +static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) +{ + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + return (start ^ end) & mask && (start & ~mask) == 0; +} + +static inline unsigned int gen8_pt_count(u64 start, u64 end) +{ + GEM_BUG_ON(start >= end); + if ((start ^ end) >> gen8_pd_shift(1)) + return GEN8_PDES - (start & (GEN8_PDES - 1)); + else + return end - start; +} + +static inline unsigned int +gen8_pd_top_count(const struct i915_address_space *vm) +{ + unsigned int shift = __gen8_pte_shift(vm->top); + return (vm->total + (1ull << shift) - 1) >> shift; +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + + if (vm->top == 2) + return ppgtt->pd; + else + return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) +{ + return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); +} + +static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, + struct i915_page_directory *pd, + int count, int lvl) +{ + if (lvl) { + void **pde = pd->entry; + + do { + if (!*pde) + continue; + + __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); + } while (pde++, --count); + } + + free_px(vm, pd); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + if (intel_vgpu_active(vm->i915)) + gen8_ppgtt_notify_vgt(ppgtt, false); + + __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); + free_scratch(vm); +} + +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + unsigned int idx, len; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); + + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && + gen8_pd_contains(start, end, lvl)) { + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); + clear_pd_entry(pd, idx, scratch); + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); + start += (u64)I915_PDES << gen8_pd_shift(lvl); + continue; + } + + if (lvl) { + start = __gen8_ppgtt_clear(vm, as_pd(pt), + start, end, lvl); + } else { + unsigned int count; + u64 *vaddr; + + count = gen8_pt_count(start, end); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); + + vaddr = kmap_atomic_px(pt); + memset64(vaddr + gen8_pd_index(start, 0), + vm->scratch[0].encode, + count); + kunmap_atomic(vaddr); + + atomic_sub(count, &pt->used); + start += count; + } + + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + } while (idx++, --len); + + return start; +} + +static void gen8_ppgtt_clear(struct i915_address_space *vm, + u64 start, u64 length) +{ + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + start, start + length, vm->top); +} + +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 * const start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + struct i915_page_table *alloc = NULL; + unsigned int idx, len; + int ret = 0; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(*start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); + + spin_lock(&pd->lock); + GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (!pt) { + spin_unlock(&pd->lock); + + DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); + + pt = fetch_and_zero(&alloc); + if (lvl) { + if (!pt) { + pt = &alloc_pd(vm)->pt; + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + fill_px(pt, vm->scratch[lvl].encode); + } else { + if (!pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + if (intel_vgpu_active(vm->i915) || + gen8_pt_count(*start, end) < I915_PDES) + fill_px(pt, vm->scratch[lvl].encode); + } + + spin_lock(&pd->lock); + if (likely(!pd->entry[idx])) + set_pd_entry(pd, idx, pt); + else + alloc = pt, pt = pd->entry[idx]; + } + + if (lvl) { + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + ret = __gen8_ppgtt_alloc(vm, as_pd(pt), + start, end, lvl); + if (unlikely(ret)) { + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + goto out; + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + GEM_BUG_ON(!atomic_read(&pt->used)); + } else { + unsigned int count = gen8_pt_count(*start, end); + + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); + + atomic_add(count, &pt->used); + /* All other pdes may be simultaneously removed */ + GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); + *start += count; + } + } while (idx++, --len); + spin_unlock(&pd->lock); +out: + if (alloc) + free_px(vm, alloc); + return ret; +} + +static int gen8_ppgtt_alloc(struct i915_address_space *vm, + u64 start, u64 length) +{ + u64 from; + int err; + + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + from = start; + + err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); + if (unlikely(err && from != start)) + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + from, start, vm->top); + + return err; +} + +static __always_inline u64 +gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, + struct i915_page_directory *pdp, + struct sgt_dma *iter, + u64 idx, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + gen8_pte_t *vaddr; + + pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + do { + GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + + iter->dma += I915_GTT_PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + idx = 0; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + } + + if (gen8_pd_index(++idx, 0) == 0) { + if (gen8_pd_index(idx, 1) == 0) { + /* Limited by sg length for 3lvl */ + if (gen8_pd_index(idx, 2) == 0) + break; + + pd = pdp->entry[gen8_pd_index(idx, 2)]; + } + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + } + } while (1); + kunmap_atomic(vaddr); + + return idx; +} + +static void gen8_ppgtt_insert_huge(struct i915_vma *vma, + struct sgt_dma *iter, + enum i915_cache_level cache_level, + u32 flags) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vma->vm, start); + struct i915_page_directory * const pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); + gen8_pte_t encode = pte_encode; + unsigned int maybe_64K = -1; + unsigned int page_size; + gen8_pte_t *vaddr; + u16 index; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); + encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); + + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) + maybe_64K = __gen8_pte_index(start, 1); + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K != -1 && index < I915_PDES && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) + maybe_64K = -1; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < I915_PDES); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K != -1 && + (index == I915_PDES || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = vma->vm->scratch[0].encode; + vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); +} + +static void gen8_ppgtt_insert(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = sgt_dma(vma); + + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); + } else { + u64 idx = vma->node.start >> GEN8_PTE_SHIFT; + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + + idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, + cache_level, flags); + } while (idx); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } +} + +static int gen8_init_scratch(struct i915_address_space *vm) +{ + int ret; + int i; + + /* + * If everybody agrees to not to write into the scratch page, + * we can reuse it for all vm, keeping contexts and processes separate. + */ + if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { + struct i915_address_space *clone = vm->gt->vm; + + GEM_BUG_ON(!clone->has_read_only); + + vm->scratch_order = clone->scratch_order; + memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); + px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ + return 0; + } + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + gen8_pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_LLC, vm->has_read_only); + + for (i = 1; i <= vm->top; i++) { + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + goto free_scratch; + + fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); + vm->scratch[i].encode = + gen8_pde_encode(px_dma(&vm->scratch[i]), + I915_CACHE_LLC); + } + + return 0; + +free_scratch: + free_scratch(vm); + return -ENOMEM; +} + +static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) +{ + struct i915_address_space *vm = &ppgtt->vm; + struct i915_page_directory *pd = ppgtt->pd; + unsigned int idx; + + GEM_BUG_ON(vm->top != 2); + GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); + + for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { + struct i915_page_directory *pde; + + pde = alloc_pd(vm); + if (IS_ERR(pde)) + return PTR_ERR(pde); + + fill_px(pde, vm->scratch[1].encode); + set_pd_entry(pd, idx, pde); + atomic_inc(px_used(pde)); /* keep pinned */ + } + wmb(); + + return 0; +} + +static struct i915_page_directory * +gen8_alloc_top_pd(struct i915_address_space *vm) +{ + const unsigned int count = gen8_pd_top_count(vm); + struct i915_page_directory *pd; + + GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + + pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + atomic_inc(px_used(pd)); /* mark as pinned */ + return pd; +} + +/* + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers + * with a net effect resembling a 2-level page table in normal x86 terms. Each + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address + * space. + * + */ +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ppgtt_init(ppgtt, gt); + ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; + + /* + * From bdw, there is hw support for read-only pages in the PPGTT. + * + * Gen11 has HSDES#:1807136187 unresolved. Disable ro support + * for now. + * + * Gen12 has inherited the same read-only fault issue from gen11. + */ + ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); + + /* + * There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915)) + ppgtt->vm.pt_kmap_wc = true; + + err = gen8_init_scratch(&ppgtt->vm); + if (err) + goto err_free; + + ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); + goto err_free_scratch; + } + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_free_pd; + } + + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; + ppgtt->vm.clear_range = gen8_ppgtt_clear; + + if (intel_vgpu_active(gt->i915)) + gen8_ppgtt_notify_vgt(ppgtt, true); + + ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + + return ppgtt; + +err_free_pd: + __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, + gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); +err_free_scratch: + free_scratch(&ppgtt->vm); +err_free: + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h new file mode 100644 index 000000000000..76a08b9c1f5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN8_PPGTT_H__ +#define __GEN8_PPGTT_H__ + +struct intel_gt; + +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/gt/gen8_renderstate.c index 95288a34c15d..95288a34c15d 100644 --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c +++ b/drivers/gpu/drm/i915/gt/gen8_renderstate.c diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen9.c b/drivers/gpu/drm/i915/gt/gen9_renderstate.c index 7d3ac02f0177..7d3ac02f0177 100644 --- a/drivers/gpu/drm/i915/intel_renderstate_gen9.c +++ b/drivers/gpu/drm/i915/gt/gen9_renderstate.c diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index c092bdf5f0bf..0ba524a414c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -27,6 +27,9 @@ #include <uapi/linux/sched/types.h> #include "i915_drv.h" +#include "i915_trace.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" static void irq_enable(struct intel_engine_cs *engine) { @@ -34,9 +37,9 @@ static void irq_enable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); + spin_lock(&engine->gt->irq_lock); engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + spin_unlock(&engine->gt->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) @@ -45,35 +48,38 @@ static void irq_disable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); + spin_lock(&engine->gt->irq_lock); engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + spin_unlock(&engine->gt->irq_lock); } static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(container_of(b, - struct intel_engine_cs, - breadcrumbs)); + irq_disable(engine); b->irq_armed = false; + intel_gt_pm_put_async(engine->gt); } void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; if (!b->irq_armed) return; - spin_lock_irq(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_armed) __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irq(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); } static inline bool __request_completed(const struct i915_request *rq) @@ -112,23 +118,30 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) } static void -__dma_fence_signal__notify(struct dma_fence *fence) +__dma_fence_signal__notify(struct dma_fence *fence, + const struct list_head *list) { struct dma_fence_cb *cur, *tmp; lockdep_assert_held(fence->lock); - lockdep_assert_irqs_disabled(); - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } - INIT_LIST_HEAD(&fence->cb_list); } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) +static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + intel_engine_add_retire(engine, tl); +} + +static void signal_irq_work(struct irq_work *work) +{ + struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; @@ -175,8 +188,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) if (!list_is_first(pos, &ce->signals)) { /* Advance the list to the first incomplete request */ __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) /* now empty */ + if (&ce->signals == pos) { /* now empty */ list_del_init(&ce->signal_link); + add_retire(b, ce->timeline); + } } } @@ -185,62 +200,29 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); - - __dma_fence_signal__timestamp(&rq->fence, timestamp); + struct list_head cb_list; spin_lock(&rq->lock); - __dma_fence_signal__notify(&rq->fence); + list_replace(&rq->fence.cb_list, &cb_list); + __dma_fence_signal__timestamp(&rq->fence, timestamp); + __dma_fence_signal__notify(&rq->fence, &cb_list); spin_unlock(&rq->lock); i915_request_put(rq); } } -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - local_irq_disable(); - intel_engine_breadcrumbs_irq(engine); - local_irq_enable(); -} - -static void signal_irq_work(struct irq_work *work) -{ - struct intel_engine_cs *engine = - container_of(work, typeof(*engine), breadcrumbs.irq_work); - - intel_engine_breadcrumbs_irq(engine); -} - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - if (!b->irq_enabled++) - irq_enable(engine); - GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ - spin_unlock_irq(&b->irq_lock); -} - -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ - if (!--b->irq_enabled) - irq_disable(engine); - spin_unlock_irq(&b->irq_lock); -} - -static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return; + return true; + + if (!intel_gt_pm_get_if_awake(engine->gt)) + return false; /* * The breadcrumb irq will be disarmed on the interrupt after the @@ -260,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled++) irq_enable(engine); + + return true; } void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -294,23 +278,23 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) bool i915_request_enable_breadcrumb(struct i915_request *rq) { lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; struct list_head *pos; spin_lock(&b->irq_lock); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - __intel_breadcrumbs_arm_irq(b); + if (!__intel_breadcrumbs_arm_irq(b)) + goto unlock; /* * We keep the seqno in retirement order, so we can break - * inside intel_engine_breadcrumbs_irq as soon as we've passed - * the last completed request (or seen a request that hasn't - * event started). We could iterate the timeline->requests list, + * inside intel_engine_signal_breadcrumbs as soon as we've + * passed the last completed request (or seen a request that + * hasn't event started). We could walk the timeline->requests, * but keeping a separate signalers_list has the advantage of * hopefully being much smaller than the full list and so * provides faster iteration and detection when there are no @@ -333,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); +unlock: spin_unlock(&b->irq_lock); } @@ -344,7 +329,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); /* * We must wait for b->irq_lock so that we know the interrupt handler @@ -354,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) */ spin_lock(&b->irq_lock); if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; list_del(&rq->signal_link); if (list_empty(&ce->signals)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 23120901c55f..57e8a051ddc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -13,6 +13,7 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_ring.h" static struct i915_global_context { struct i915_global base; @@ -30,8 +31,7 @@ void intel_context_free(struct intel_context *ce) } struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +intel_context_create(struct intel_engine_cs *engine) { struct intel_context *ce; @@ -39,164 +39,266 @@ intel_context_create(struct i915_gem_context *ctx, if (!ce) return ERR_PTR(-ENOMEM); - intel_context_init(ce, ctx, engine); + intel_context_init(ce, engine); return ce; } -int __intel_context_do_pin(struct intel_context *ce) +int intel_context_alloc_state(struct intel_context *ce) { - int err; + int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) return -EINTR; - if (likely(!atomic_read(&ce->pin_count))) { - intel_wakeref_t wakeref; + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = ce->ops->alloc(ce); + if (unlikely(err)) + goto unlock; + + set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + } + +unlock: + mutex_unlock(&ce->pin_mutex); + return err; +} + +static int intel_context_active_acquire(struct intel_context *ce) +{ + int err; + + __i915_active_acquire(&ce->active); + + if (intel_context_is_barrier(ce)) + return 0; + + /* Preallocate tracking nodes */ + err = i915_active_acquire_preallocate_barrier(&ce->active, + ce->engine); + if (err) + i915_active_release(&ce->active); - err = 0; - with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) - err = ce->ops->pin(ce); + return err; +} + +static void intel_context_active_release(struct intel_context *ce) +{ + /* Nodes preallocated in intel_context_active() */ + i915_active_acquire_barrier(&ce->active); + i915_active_release(&ce->active); +} + +int __intel_context_do_pin(struct intel_context *ce) +{ + int err; + + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + err = intel_context_alloc_state(ce); if (err) - goto err; + return err; + } - i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ + err = i915_active_acquire(&ce->active); + if (err) + return err; + + if (mutex_lock_interruptible(&ce->pin_mutex)) { + err = -EINTR; + goto out_release; + } + + if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { + err = intel_context_active_acquire(ce); + if (unlikely(err)) + goto out_unlock; + + err = ce->ops->pin(ce); + if (unlikely(err)) + goto err_active; + + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); smp_mb__before_atomic(); /* flush pin before it is visible */ + atomic_inc(&ce->pin_count); } - atomic_inc(&ce->pin_count); GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + goto out_unlock; +err_active: + intel_context_active_release(ce); +out_unlock: mutex_unlock(&ce->pin_mutex); - return 0; - -err: - mutex_unlock(&ce->pin_mutex); +out_release: + i915_active_release(&ce->active); return err; } void intel_context_unpin(struct intel_context *ce) { - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + if (!atomic_dec_and_test(&ce->pin_count)) return; - /* We may be called from inside intel_context_pin() to evict another */ - intel_context_get(ce); - mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); - - if (likely(atomic_dec_and_test(&ce->pin_count))) { - ce->ops->unpin(ce); - - i915_gem_context_put(ce->gem_context); - intel_context_active_release(ce); - } + CE_TRACE(ce, "unpin\n"); + ce->ops->unpin(ce); - mutex_unlock(&ce->pin_mutex); + /* + * Once released, we may asynchronously drop the active reference. + * As that may be the only reference keeping the context alive, + * take an extra now so that it is not freed before we finish + * dereferencing it. + */ + intel_context_get(ce); + intel_context_active_release(ce); intel_context_put(ce); } -static int __context_pin_state(struct i915_vma *vma, unsigned long flags) +static int __context_pin_state(struct i915_vma *vma) { + unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; int err; - err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL); + err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); if (err) return err; + err = i915_active_acquire(&vma->active); + if (err) + goto err_unpin; + /* * And mark it as a globally pinned object to let the shrinker know * it cannot reclaim the object until we release it. */ - vma->obj->pin_global++; + i915_vma_make_unshrinkable(vma); vma->obj->mm.dirty = true; return 0; + +err_unpin: + i915_vma_unpin(vma); + return err; } static void __context_unpin_state(struct i915_vma *vma) { - vma->obj->pin_global--; + i915_vma_make_shrinkable(vma); + i915_active_release(&vma->active); __i915_vma_unpin(vma); } -static void intel_context_retire(struct i915_active *active) +static int __ring_active(struct intel_ring *ring) { - struct intel_context *ce = container_of(active, typeof(*ce), active); + int err; - if (ce->state) - __context_unpin_state(ce->state); + err = i915_active_acquire(&ring->vma->active); + if (err) + return err; - intel_ring_unpin(ce->ring); - intel_context_put(ce); + err = intel_ring_pin(ring); + if (err) + goto err_active; + + return 0; + +err_active: + i915_active_release(&ring->vma->active); + return err; } -void -intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void __ring_retire(struct intel_ring *ring) { - GEM_BUG_ON(!engine->cops); + intel_ring_unpin(ring); + i915_active_release(&ring->vma->active); +} - kref_init(&ce->ref); +__i915_active_call +static void __intel_context_retire(struct i915_active *active) +{ + struct intel_context *ce = container_of(active, typeof(*ce), active); - ce->gem_context = ctx; - ce->engine = engine; - ce->ops = engine->cops; - ce->sseu = engine->sseu; + CE_TRACE(ce, "retire\n"); - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); + set_bit(CONTEXT_VALID_BIT, &ce->flags); + if (ce->state) + __context_unpin_state(ce->state); - mutex_init(&ce->pin_mutex); + intel_timeline_unpin(ce->timeline); + __ring_retire(ce->ring); - i915_active_init(ctx->i915, &ce->active, intel_context_retire); + intel_context_put(ce); } -int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) +static int __intel_context_active(struct i915_active *active) { + struct intel_context *ce = container_of(active, typeof(*ce), active); int err; - if (!i915_active_acquire(&ce->active)) - return 0; + CE_TRACE(ce, "active\n"); intel_context_get(ce); - err = intel_ring_pin(ce->ring); + err = __ring_active(ce->ring); if (err) goto err_put; + err = intel_timeline_pin(ce->timeline); + if (err) + goto err_ring; + if (!ce->state) return 0; - err = __context_pin_state(ce->state, flags); + err = __context_pin_state(ce->state); if (err) - goto err_ring; - - /* Preallocate tracking nodes */ - if (!i915_gem_context_is_kernel(ce->gem_context)) { - err = i915_active_acquire_preallocate_barrier(&ce->active, - ce->engine); - if (err) - goto err_state; - } + goto err_timeline; return 0; -err_state: - __context_unpin_state(ce->state); +err_timeline: + intel_timeline_unpin(ce->timeline); err_ring: - intel_ring_unpin(ce->ring); + __ring_retire(ce->ring); err_put: intel_context_put(ce); - i915_active_cancel(&ce->active); return err; } -void intel_context_active_release(struct intel_context *ce) +void +intel_context_init(struct intel_context *ce, + struct intel_engine_cs *engine) { - /* Nodes preallocated in intel_context_active() */ - i915_active_acquire_barrier(&ce->active); - i915_active_release(&ce->active); + GEM_BUG_ON(!engine->cops); + GEM_BUG_ON(!engine->gt->vm); + + kref_init(&ce->ref); + + ce->engine = engine; + ce->ops = engine->cops; + ce->sseu = engine->sseu; + ce->ring = __intel_context_ring_size(SZ_4K); + + ce->vm = i915_vm_get(engine->gt->vm); + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + mutex_init(&ce->pin_mutex); + + i915_active_init(&ce->active, + __intel_context_active, __intel_context_retire); +} + +void intel_context_fini(struct intel_context *ce) +{ + if (ce->timeline) + intel_timeline_put(ce->timeline); + i915_vm_put(ce->vm); + + mutex_destroy(&ce->pin_mutex); + i915_active_fini(&ce->active); } static void i915_global_context_shrink(void) @@ -227,13 +329,42 @@ int __init i915_global_context_init(void) void intel_context_enter_engine(struct intel_context *ce) { intel_engine_pm_get(ce->engine); + intel_timeline_enter(ce->timeline); } void intel_context_exit_engine(struct intel_context *ce) { + intel_timeline_exit(ce->timeline); intel_engine_pm_put(ce->engine); } +int intel_context_prepare_remote_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_timeline *tl = ce->timeline; + int err; + + /* Only suitable for use in remotely modifying this context */ + GEM_BUG_ON(rq->context == ce); + + if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ + /* Queue this switch after current activity by this context. */ + err = i915_active_fence_set(&tl->last_request, rq); + if (err) + return err; + } + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + return i915_active_add_request(&ce->active, rq); +} + struct i915_request *intel_context_create_request(struct intel_context *ce) { struct i915_request *rq; @@ -248,3 +379,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce) return rq; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_context.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index a47275bc4f01..30bd248827d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -7,18 +7,31 @@ #ifndef __INTEL_CONTEXT_H__ #define __INTEL_CONTEXT_H__ +#include <linux/bitops.h> #include <linux/lockdep.h> +#include <linux/types.h> +#include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_ring_types.h" +#include "intel_timeline_types.h" + +#define CE_TRACE(ce, fmt, ...) do { \ + const struct intel_context *ce__ = (ce); \ + ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \ + ce__->timeline->fence_context, \ + ##__VA_ARGS__); \ +} while (0) void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, struct intel_engine_cs *engine); +void intel_context_fini(struct intel_context *ce); struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); +intel_context_create(struct intel_engine_cs *engine); + +int intel_context_alloc_state(struct intel_context *ce); void intel_context_free(struct intel_context *ce); @@ -65,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) int __intel_context_do_pin(struct intel_context *ce); +static inline bool intel_context_pin_if_active(struct intel_context *ce) +{ + return atomic_inc_not_zero(&ce->pin_count); +} + static inline int intel_context_pin(struct intel_context *ce) { - if (likely(atomic_inc_not_zero(&ce->pin_count))) + if (likely(intel_context_pin_if_active(ce))) return 0; return __intel_context_do_pin(ce); @@ -86,25 +104,25 @@ void intel_context_exit_engine(struct intel_context *ce); static inline void intel_context_enter(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); if (!ce->active_count++) ce->ops->enter(ce); } static inline void intel_context_mark_active(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); ++ce->active_count; } static inline void intel_context_exit(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); GEM_BUG_ON(!ce->active_count); if (!--ce->active_count) ce->ops->exit(ce); } -int intel_context_active_acquire(struct intel_context *ce, unsigned long flags); -void intel_context_active_release(struct intel_context *ce); - static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); @@ -116,19 +134,94 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } -static inline int __must_check +static inline struct intel_timeline *__must_check intel_context_timeline_lock(struct intel_context *ce) - __acquires(&ce->ring->timeline->mutex) + __acquires(&ce->timeline->mutex) { - return mutex_lock_interruptible(&ce->ring->timeline->mutex); + struct intel_timeline *tl = ce->timeline; + int err; + + err = mutex_lock_interruptible(&tl->mutex); + if (err) + return ERR_PTR(err); + + return tl; } -static inline void intel_context_timeline_unlock(struct intel_context *ce) - __releases(&ce->ring->timeline->mutex) +static inline void intel_context_timeline_unlock(struct intel_timeline *tl) + __releases(&tl->mutex) { - mutex_unlock(&ce->ring->timeline->mutex); + mutex_unlock(&tl->mutex); } +int intel_context_prepare_remote_request(struct intel_context *ce, + struct i915_request *rq); + struct i915_request *intel_context_create_request(struct intel_context *ce); +static inline struct intel_ring *__intel_context_ring_size(u64 sz) +{ + return u64_to_ptr(struct intel_ring, sz); +} + +static inline bool intel_context_is_barrier(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); +} + +static inline bool intel_context_use_semaphores(const struct intel_context *ce) +{ + return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_set_use_semaphores(struct intel_context *ce) +{ + set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_clear_use_semaphores(struct intel_context *ce) +{ + clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline bool intel_context_is_banned(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool intel_context_set_banned(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool +intel_context_force_single_submission(const struct intel_context *ce) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline void +intel_context_set_single_submission(struct intel_context *ce) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline bool +intel_context_nopreempt(const struct intel_context *ce) +{ + return test_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_set_nopreempt(struct intel_context *ce) +{ + set_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_clear_nopreempt(struct intel_context *ce) +{ + clear_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 08049ee91cee..ca1420fb8b53 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,15 +13,20 @@ #include <linux/types.h> #include "i915_active_types.h" +#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" +#define CONTEXT_REDZONE POISON_INUSE + struct i915_gem_context; struct i915_vma; struct intel_context; struct intel_ring; struct intel_context_ops { + int (*alloc)(struct intel_context *ce); + int (*pin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce); @@ -35,20 +40,35 @@ struct intel_context_ops { struct intel_context { struct kref ref; - struct i915_gem_context *gem_context; struct intel_engine_cs *engine; struct intel_engine_cs *inflight; +#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2) +#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) + + struct i915_address_space *vm; + struct i915_gem_context __rcu *gem_context; struct list_head signal_link; struct list_head signals; struct i915_vma *state; struct intel_ring *ring; + struct intel_timeline *timeline; + + unsigned long flags; +#define CONTEXT_BARRIER_BIT 0 +#define CONTEXT_ALLOC_BIT 1 +#define CONTEXT_VALID_BIT 2 +#define CONTEXT_USE_SEMAPHORES 3 +#define CONTEXT_BANNED 4 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 +#define CONTEXT_NOPREEMPT 6 u32 *lrc_reg_state; u64 lrc_desc; + u32 tag; /* cookie passed to HW to track this context on submission */ - unsigned int active_count; /* notionally protected by timeline->mutex */ + unsigned int active_count; /* protected by timeline->mutex */ atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 2f1c6871ee95..5df003061e44 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -9,17 +9,17 @@ #include <linux/random.h> #include <linux/seqlock.h> -#include "i915_gem_batch_pool.h" #include "i915_pmu.h" #include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" struct drm_printer; +struct intel_gt; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -29,6 +29,13 @@ struct drm_printer; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) +#define ENGINE_TRACE(e, fmt, ...) do { \ + const struct intel_engine_cs *e__ __maybe_unused = (e); \ + GEM_TRACE("%s %s: " fmt, \ + dev_name(e__->i915->drm.dev), e__->name, \ + ##__VA_ARGS__); \ +} while (0) + /* * The register defines to be used with the following macros need to accept a * base param, e.g: @@ -51,7 +58,7 @@ struct drm_printer; #define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__) #define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) #define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) -#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__) +#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__) #define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__) #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ @@ -90,106 +97,36 @@ struct drm_printer; /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -enum intel_engine_hangcheck_action { - ENGINE_IDLE = 0, - ENGINE_WAIT, - ENGINE_ACTIVE_SEQNO, - ENGINE_ACTIVE_HEAD, - ENGINE_ACTIVE_SUBUNITS, - ENGINE_WAIT_KICK, - ENGINE_DEAD, -}; - -static inline const char * -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) -{ - switch (a) { - case ENGINE_IDLE: - return "idle"; - case ENGINE_WAIT: - return "wait"; - case ENGINE_ACTIVE_SEQNO: - return "active seqno"; - case ENGINE_ACTIVE_HEAD: - return "active head"; - case ENGINE_ACTIVE_SUBUNITS: - return "active subunits"; - case ENGINE_WAIT_KICK: - return "wait kick"; - case ENGINE_DEAD: - return "dead"; - } - return "unknown"; -} - -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); - -static inline void -execlists_set_active(struct intel_engine_execlists *execlists, - unsigned int bit) +static inline unsigned int +execlists_num_ports(const struct intel_engine_execlists * const execlists) { - __set_bit(bit, (unsigned long *)&execlists->active); + return execlists->port_mask + 1; } -static inline bool -execlists_set_active_once(struct intel_engine_execlists *execlists, - unsigned int bit) +static inline struct i915_request * +execlists_active(const struct intel_engine_execlists *execlists) { - return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); + return *READ_ONCE(execlists->active); } static inline void -execlists_clear_active(struct intel_engine_execlists *execlists, - unsigned int bit) +execlists_active_lock_bh(struct intel_engine_execlists *execlists) { - __clear_bit(bit, (unsigned long *)&execlists->active); + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&execlists->tasklet); } static inline void -execlists_clear_all_active(struct intel_engine_execlists *execlists) +execlists_active_unlock_bh(struct intel_engine_execlists *execlists) { - execlists->active = 0; + tasklet_unlock(&execlists->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ } -static inline bool -execlists_is_active(const struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return test_bit(bit, (unsigned long *)&execlists->active); -} - -void execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port); -void execlists_user_end(struct intel_engine_execlists *execlists); - -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); - struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); -static inline unsigned int -execlists_num_ports(const struct intel_engine_execlists * const execlists) -{ - return execlists->port_mask + 1; -} - -static inline struct execlist_port * -execlists_port_complete(struct intel_engine_execlists * const execlists, - struct execlist_port * const port) -{ - const unsigned int m = execlists->port_mask; - - GEM_BUG_ON(port_index(port, execlists) != 0); - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - - memmove(port, port + 1, m * sizeof(struct execlist_port)); - memset(port + m, 0, sizeof(struct execlist_port)); - - return port; -} - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { @@ -243,134 +180,19 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_WRITE_INDEX 0x1f #define CNL_HWS_CSB_WRITE_INDEX 0x2f -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size); -int intel_ring_pin(struct intel_ring *ring); -void intel_ring_reset(struct intel_ring *ring, u32 tail); -unsigned int intel_ring_update_space(struct intel_ring *ring); -void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct kref *ref); - -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) -{ - kref_get(&ring->ref); - return ring; -} - -static inline void intel_ring_put(struct intel_ring *ring) -{ - kref_put(&ring->ref, intel_ring_free); -} - void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); -int __must_check intel_ring_cacheline_align(struct i915_request *rq); - -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); - -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) -{ - /* Dummy function. - * - * This serves as a placeholder in the code so that the reader - * can compare against the preceding intel_ring_begin() and - * check that the number of dwords emitted matches the space - * reserved for the command packet (i.e. the value passed to - * intel_ring_begin()). - */ - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); -} - -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) -{ - return pos & (ring->size - 1); -} - -static inline bool -intel_ring_offset_valid(const struct intel_ring *ring, - unsigned int pos) -{ - if (pos & -ring->size) /* must be strictly within the ring */ - return false; - - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ - return false; - - return true; -} +int intel_engines_init_mmio(struct intel_gt *gt); +int intel_engines_init(struct intel_gt *gt); -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) -{ - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - rq->ring->vaddr; - GEM_BUG_ON(offset > rq->ring->size); - return intel_ring_wrap(rq->ring, offset); -} - -static inline void -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - /* - * "Ring Buffer Use" - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - * - * We use ring->head as the last known location of the actual RING_HEAD, - * it may have advanced but in the worst case it is equally the same - * as ring->head and so we should never program RING_TAIL to advance - * into the same cacheline as ring->head. - */ -#define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); -#undef cacheline -} - -static inline unsigned int -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) -{ - /* Whilst writes to the tail are strictly order, there is no - * serialisation between readers and the writers. The tail may be - * read by i915_request_retire() just as it is being updated - * by execlists, as although the breadcrumb is complete, the context - * switch hasn't been seen. - */ - assert_ring_tail_valid(ring, tail); - ring->tail = tail; - return tail; -} - -static inline unsigned int -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - -int intel_engines_init_mmio(struct drm_i915_private *i915); -int intel_engines_setup(struct drm_i915_private *i915); -int intel_engines_init(struct drm_i915_private *i915); -void intel_engines_cleanup(struct drm_i915_private *i915); +void intel_engines_release(struct intel_gt *gt); +void intel_engines_free(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_ring_submission_setup(struct intel_engine_cs *engine); -int intel_ring_submission_init(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); @@ -380,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone); void intel_engine_init_execlists(struct intel_engine_cs *engine); @@ -388,20 +210,14 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void -intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) +intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { irq_work_queue(&engine->breadcrumbs.irq_work); } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); - void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -456,19 +272,19 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } -static inline void intel_engine_reset(struct intel_engine_cs *engine, - bool stalled) +static inline void __intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) { - if (engine->reset.reset) - engine->reset.reset(engine, stalled); + if (engine->reset.rewind) + engine->reset.rewind(engine, stalled); engine->serial++; /* contexts lost */ } +bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engine_is_idle(struct intel_engine_cs *engine); -bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +void intel_engine_flush_submission(struct intel_engine_cs *engine); -void intel_engines_reset_default_submission(struct drm_i915_private *i915); -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); +void intel_engines_reset_default_submission(struct intel_gt *gt); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); @@ -477,64 +293,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); - -static inline void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static inline void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - int intel_enable_engine_stats(struct intel_engine_cs *engine); void intel_disable_engine_stats(struct intel_engine_cs *engine); @@ -543,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); -u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class); +u32 intel_engine_context_size(struct intel_gt *gt, u8 class); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) @@ -571,4 +329,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 +static inline bool +intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return false; + + return intel_engine_has_preemption(engine); +} + +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return intel_engine_has_semaphores(engine); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f25632c9b292..06ff7695fa29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,11 +28,16 @@ #include "i915_drv.h" +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" -#include "intel_context.h" +#include "intel_engine_pool.h" +#include "intel_engine_user.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" +#include "intel_ring.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -51,30 +56,6 @@ #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) -struct engine_class_info { - const char *name; - u8 uabi_class; -}; - -static const struct engine_class_info intel_engine_classes[] = { - [RENDER_CLASS] = { - .name = "rcs", - .uabi_class = I915_ENGINE_CLASS_RENDER, - }, - [COPY_ENGINE_CLASS] = { - .name = "bcs", - .uabi_class = I915_ENGINE_CLASS_COPY, - }, - [VIDEO_DECODE_CLASS] = { - .name = "vcs", - .uabi_class = I915_ENGINE_CLASS_VIDEO, - }, - [VIDEO_ENHANCEMENT_CLASS] = { - .name = "vecs", - .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, - }, -}; - #define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; @@ -160,7 +141,7 @@ static const struct engine_info intel_engines[] = { /** * intel_engine_context_size() - return the size of the context for an engine - * @dev_priv: i915 device private + * @gt: the gt * @class: engine class * * Each engine class may require a different amount of space for a context @@ -172,18 +153,20 @@ static const struct engine_info intel_engines[] = { * in LRC mode, but does not include the "shared data page" used with * GuC submission. The caller should account for this if using the GuC. */ -u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +u32 intel_engine_context_size(struct intel_gt *gt, u8 class) { + struct intel_uncore *uncore = gt->uncore; u32 cxt_size; BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); switch (class) { case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(gt->i915)) { default: - MISSING_CASE(INTEL_GEN(dev_priv)); + MISSING_CASE(INTEL_GEN(gt->i915)); return DEFAULT_LR_CONTEXT_RENDER_SIZE; + case 12: case 11: return GEN11_LR_CONTEXT_RENDER_SIZE; case 10: @@ -193,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case 8: return GEN8_LR_CONTEXT_RENDER_SIZE; case 7: - if (IS_HASWELL(dev_priv)) + if (IS_HASWELL(gt->i915)) return HSW_CXT_TOTAL_SIZE; - cxt_size = I915_READ(GEN7_CXT_SIZE); + cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE); return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 6: - cxt_size = I915_READ(CXT_SIZE); + cxt_size = intel_uncore_read(uncore, CXT_SIZE); return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 5: @@ -215,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) * minimum allocation anyway so it should all come * out in the wash. */ - cxt_size = I915_READ(CXT_SIZE) + 1; + cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", - INTEL_GEN(dev_priv), + INTEL_GEN(gt->i915), cxt_size * 64, cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); @@ -234,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: - if (INTEL_GEN(dev_priv) < 8) + if (INTEL_GEN(gt->i915) < 8) return 0; return GEN8_LR_CONTEXT_OTHER_SIZE; } @@ -255,11 +238,16 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915, return bases[i].base; } -static void __sprint_engine_name(char *name, const struct engine_info *info) +static void __sprint_engine_name(struct intel_engine_cs *engine) { - WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", - intel_engine_classes[info->class].name, - info->instance) >= INTEL_ENGINE_CS_MAX_NAME); + /* + * Before we know what the uABI name for this engine will be, + * we still would like to keep track of this engine in the debug logs. + * We throw in a ' here as a reminder that this isn't its final name. + */ + GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u", + intel_engine_class_repr(engine->class), + engine->instance) >= sizeof(engine->name)); } void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) @@ -283,28 +271,26 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) intel_engine_set_hwsp_writemask(engine, ~0u); } -static int -intel_engine_setup(struct drm_i915_private *dev_priv, - enum intel_engine_id id) +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; struct intel_engine_cs *engine; - GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) + return -EINVAL; + if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) return -EINVAL; - if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance])) return -EINVAL; - GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) return -ENOMEM; @@ -312,33 +298,37 @@ intel_engine_setup(struct drm_i915_private *dev_priv, BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); engine->id = id; + engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); - engine->i915 = dev_priv; - engine->uncore = &dev_priv->uncore; - __sprint_engine_name(engine->name, info); + engine->i915 = gt->i915; + engine->gt = gt; + engine->uncore = gt->uncore; engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); + engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases); + engine->class = info->class; engine->instance = info->instance; - - /* - * To be overridden by the backend on setup. However to facilitate - * cleanup on error during setup, we always provide the destroy vfunc. - */ - engine->destroy = (typeof(engine->destroy))kfree; - - engine->uabi_class = intel_engine_classes[info->class].uabi_class; - - engine->context_size = intel_engine_context_size(dev_priv, - engine->class); + __sprint_engine_name(engine); + + engine->props.heartbeat_interval_ms = + CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.preempt_timeout_ms = + CONFIG_DRM_I915_PREEMPT_TIMEOUT; + engine->props.stop_timeout_ms = + CONFIG_DRM_I915_STOP_TIMEOUT; + engine->props.timeslice_duration_ms = + CONFIG_DRM_I915_TIMESLICE_DURATION; + + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) - DRIVER_CAPS(dev_priv)->has_logical_contexts = true; + DRIVER_CAPS(gt->i915)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ewma__engine_latency_init(&engine->latency); seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -346,8 +336,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Scrub mmio state on takeover */ intel_engine_sanitize_mmio(engine); - dev_priv->engine_class[info->class][info->instance] = engine; - dev_priv->engine[id] = engine; + gt->engine_class[info->class][info->instance] = engine; + gt->engine[id] = engine; + + gt->i915->engine[id] = engine; + return 0; } @@ -381,38 +374,58 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) } } -static void intel_setup_engine_capabilities(struct drm_i915_private *i915) +static void intel_setup_engine_capabilities(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) __setup_engine_capabilities(engine); } /** - * intel_engines_cleanup() - free the resources allocated for Command Streamers - * @i915: the i915 devic + * intel_engines_release() - free the resources allocated for Command Streamers + * @gt: pointer to struct intel_gt */ -void intel_engines_cleanup(struct drm_i915_private *i915) +void intel_engines_release(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Decouple the backend; but keep the layout for late GPU resets */ + for_each_engine(engine, gt, id) { + if (!engine->release) + continue; + + engine->release(engine); + engine->release = NULL; + + memset(&engine->reset, 0, sizeof(engine->reset)); + + gt->i915->engine[id] = NULL; + } +} + +void intel_engines_free(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { - engine->destroy(engine); - i915->engine[id] = NULL; + for_each_engine(engine, gt, id) { + kfree(engine); + gt->engine[id] = NULL; } } /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @i915: the i915 device + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct drm_i915_private *i915) +int intel_engines_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct intel_device_info *device_info = mkwrite_device_info(i915); const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; @@ -423,14 +436,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) WARN_ON(engine_mask & GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(i915)) return -ENODEV; for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(i915, i); + err = intel_engine_setup(gt, i); if (err) goto cleanup; @@ -445,61 +458,19 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) if (WARN_ON(mask != engine_mask)) device_info->engine_mask = mask; - /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(i915, RCS0))) { - err = -ENODEV; - goto cleanup; - } - RUNTIME_INFO(i915)->num_engines = hweight32(mask); - i915_check_and_clear_faults(i915); - - intel_setup_engine_capabilities(i915); - - return 0; - -cleanup: - intel_engines_cleanup(i915); - return err; -} - -/** - * intel_engines_init() - init the Engine Command Streamers - * @i915: i915 device private - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init(struct drm_i915_private *i915) -{ - int (*init)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(i915)) - init = intel_execlists_submission_init; - else - init = intel_ring_submission_init; + intel_gt_check_and_clear_faults(gt); - for_each_engine(engine, i915, id) { - err = init(engine); - if (err) - goto cleanup; - } + intel_setup_engine_capabilities(gt); return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_free(gt); return err; } -static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) -{ - i915_gem_batch_pool_init(&engine->batch_pool, engine); -} - void intel_engine_init_execlists(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -508,6 +479,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + memset(execlists->pending, 0, sizeof(execlists->pending)); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } @@ -536,7 +511,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, unsigned int flags; flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) + if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so * restrict its pinning to the low mappable arena. @@ -577,7 +552,7 @@ static int init_status_page(struct intel_engine_cs *engine) i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -607,7 +582,7 @@ err: return ret; } -static int intel_engine_setup_common(struct intel_engine_cs *engine) +static int engine_setup_common(struct intel_engine_cs *engine) { int err; @@ -620,104 +595,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); - intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); + + intel_engine_pool_init(&engine->pool); /* Use the whole device by default */ engine->sseu = intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); - return 0; -} - -/** - * intel_engines_setup- setup engine state not requiring hw access - * @i915: Device to setup. - * - * Initializes engine structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -int intel_engines_setup(struct drm_i915_private *i915) -{ - int (*setup)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(i915)) - setup = intel_execlists_submission_setup; - else - setup = intel_ring_submission_setup; - - for_each_engine(engine, i915, id) { - err = intel_engine_setup_common(engine); - if (err) - goto cleanup; - - err = setup(engine); - if (err) - goto cleanup; - - /* We expect the backend to take control over its state */ - GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree); - - GEM_BUG_ON(!engine->cops); - } + intel_engine_init_workarounds(engine); + intel_engine_init_whitelist(engine); + intel_engine_init_ctx_wa(engine); return 0; - -cleanup: - intel_engines_cleanup(i915); - return err; -} - -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) -{ - static const struct { - u8 engine; - u8 sched; - } map[] = { -#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } - MAP(PREEMPTION, PREEMPTION), - MAP(SEMAPHORES, SEMAPHORES), -#undef MAP - }; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 enabled, disabled; - - enabled = 0; - disabled = 0; - for_each_engine(engine, i915, id) { /* all engines must agree! */ - int i; - - if (engine->schedule) - enabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - else - disabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - - for (i = 0; i < ARRAY_SIZE(map); i++) { - if (engine->flags & BIT(map[i].engine)) - enabled |= BIT(map[i].sched); - else - disabled |= BIT(map[i].sched); - } - } - - i915->caps.scheduler = enabled & ~disabled; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) - i915->caps.scheduler = 0; } struct measure_breadcrumb { struct i915_request rq; - struct i915_timeline timeline; + struct intel_timeline timeline; struct intel_ring ring; u32 cs[1024]; }; @@ -727,19 +624,19 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) struct measure_breadcrumb *frame; int dw = -ENOMEM; - GEM_BUG_ON(!engine->i915->gt.scratch); + GEM_BUG_ON(!engine->gt->scratch); frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) return -ENOMEM; - if (i915_timeline_init(engine->i915, - &frame->timeline, - engine->status_page.vma)) + if (intel_timeline_init(&frame->timeline, + engine->gt, + engine->status_page.vma)) goto out_frame; - INIT_LIST_HEAD(&frame->ring.request_list); - frame->ring.timeline = &frame->timeline; + mutex_lock(&frame->timeline.mutex); + frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; @@ -748,48 +645,33 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.ring = &frame->ring; - frame->rq.timeline = &frame->timeline; + rcu_assign_pointer(frame->rq.timeline, &frame->timeline); - dw = i915_timeline_pin(&frame->timeline); + dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; + spin_lock_irq(&engine->active.lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ - i915_timeline_unpin(&frame->timeline); + intel_timeline_unpin(&frame->timeline); out_timeline: - i915_timeline_fini(&frame->timeline); + mutex_unlock(&frame->timeline.mutex); + intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; } -static int pin_context(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context **out) -{ - struct intel_context *ce; - int err; - - ce = i915_gem_context_get_engine(ctx, engine->id); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = intel_context_pin(ce); - intel_context_put(ce); - if (err) - return err; - - *out = ce; - return 0; -} - void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { INIT_LIST_HEAD(&engine->active.requests); + INIT_LIST_HEAD(&engine->active.hold); spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); @@ -807,6 +689,36 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) #endif } +static struct intel_context * +create_kernel_context(struct intel_engine_cs *engine) +{ + static struct lock_class_key kernel; + struct intel_context *ce; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ce; + + __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); + + err = intel_context_pin(ce); /* perma-pin so it is always available */ + if (err) { + intel_context_put(ce); + return ERR_PTR(err); + } + + /* + * Give our perma-pinned kernel timelines a separate lockdep class, + * so that we can use them from within the normal user timelines + * should we need to inject GPU operations during their request + * construction. + */ + lockdep_set_class(&ce->timeline->mutex, &kernel); + + return ce; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -818,47 +730,65 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) * * Returns zero on success or an error code on failure. */ -int intel_engine_init_common(struct intel_engine_cs *engine) +static int engine_init_common(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; - /* We may need to do things with the shrinker which + engine->set_default_submission(engine); + + ret = measure_breadcrumb_dw(engine); + if (ret < 0) + return ret; + + engine->emit_fini_breadcrumb_dw = ret; + + /* + * We may need to do things with the shrinker which * require us to immediately switch back to the default * context. This can cause a problem as pinning the * default context also requires GTT space which may not * be available. To avoid this we always pin the default * context. */ - ret = pin_context(i915->kernel_context, engine, - &engine->kernel_context); - if (ret) - return ret; + ce = create_kernel_context(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); - /* - * Similarly the preempt context must always be available so that - * we can interrupt the engine at any time. However, as preemption - * is optional, we allow it to fail. - */ - if (i915->preempt_context) - pin_context(i915->preempt_context, engine, - &engine->preempt_context); + engine->kernel_context = ce; - ret = measure_breadcrumb_dw(engine); - if (ret < 0) - goto err_unpin; + return 0; +} - engine->emit_fini_breadcrumb_dw = ret; +int intel_engines_init(struct intel_gt *gt) +{ + int (*setup)(struct intel_engine_cs *engine); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - engine->set_default_submission(engine); + if (HAS_EXECLISTS(gt->i915)) + setup = intel_execlists_submission_setup; + else + setup = intel_ring_submission_setup; - return 0; + for_each_engine(engine, gt, id) { + err = engine_setup_common(engine); + if (err) + return err; -err_unpin: - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); - intel_context_unpin(engine->kernel_context); - return ret; + err = setup(engine); + if (err) + return err; + + err = engine_init_common(engine); + if (err) + return err; + + intel_engine_add_user(engine); + } + + return 0; } /** @@ -871,19 +801,22 @@ err_unpin: void intel_engine_cleanup_common(struct intel_engine_cs *engine) { GEM_BUG_ON(!list_empty(&engine->active.requests)); + tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ cleanup_status_page(engine); + intel_engine_fini_retire(engine); + intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); - intel_context_unpin(engine->kernel_context); + if (engine->kernel_context) { + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + } GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); @@ -919,6 +852,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +static unsigned long stop_timeout(const struct intel_engine_cs *engine) +{ + if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ + return 0; + + /* + * If we are doing a normal GPU reset, we can take our time and allow + * the engine to quiesce. We've stopped submission to the engine, and + * if we wait long enough an innocent context should complete and + * leave the engine idle. So they should not be caught unaware by + * the forthcoming GPU reset (which usually follows the stop_cs)! + */ + return READ_ONCE(engine->props.stop_timeout_ms); +} + int intel_engine_stop_cs(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -929,16 +877,16 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 3) return -ENODEV; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); err = 0; if (__intel_wait_for_register_fw(uncore, mode, MODE_IDLE, MODE_IDLE, - 1000, 0, + 1000, stop_timeout(engine), NULL)) { - GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n"); err = -ETIMEDOUT; } @@ -950,7 +898,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) { - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -966,57 +914,23 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } } -u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - unsigned int slice = fls(sseu->slice_mask) - 1; - unsigned int subslice; - u32 mcr_s_ss_select; - - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = fls(sseu->subslice_mask[slice]); - GEM_BUG_ON(!subslice); - subslice--; - - if (IS_GEN(dev_priv, 10)) - mcr_s_ss_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); - else if (INTEL_GEN(dev_priv) >= 11) - mcr_s_ss_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); - else - mcr_s_ss_select = 0; - - return mcr_s_ss_select; -} - static u32 -read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, - i915_reg_t reg) +read_subslice_reg(const struct intel_engine_cs *engine, + int slice, int subslice, i915_reg_t reg) { struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; - u32 mcr_slice_subslice_mask; - u32 mcr_slice_subslice_select; - u32 default_mcr_s_ss_select; - u32 mcr; - u32 ret; + u32 mcr_mask, mcr_ss, mcr, old_mcr, val; enum forcewake_domains fw_domains; if (INTEL_GEN(i915) >= 11) { - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); } else { - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); } - default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(i915); - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(uncore, @@ -1026,33 +940,31 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != - default_mcr_s_ss_select); - - mcr &= ~mcr_slice_subslice_mask; - mcr |= mcr_slice_subslice_select; + mcr &= ~mcr_mask; + mcr |= mcr_ss; intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - ret = intel_uncore_read_fw(uncore, reg); + val = intel_uncore_read_fw(uncore, reg); - mcr &= ~mcr_slice_subslice_mask; - mcr |= default_mcr_s_ss_select; + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); - return ret; + return val; } /* NB: please notice the memset */ -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1070,7 +982,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(i915, slice, subslice) { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = read_subslice_reg(engine, slice, subslice, GEN7_SAMPLER_INSTDONE); @@ -1113,16 +1025,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, static bool ring_is_idle(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - intel_wakeref_t wakeref; bool idle = true; if (I915_SELFTEST_ONLY(!engine->mmio_base)) return true; - /* If the whole device is asleep, the engine must be idle */ - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); - if (!wakeref) + if (!intel_engine_pm_get_if_awake(engine)) return true; /* First check that no commands are left in the ring */ @@ -1131,15 +1039,34 @@ static bool ring_is_idle(struct intel_engine_cs *engine) idle = false; /* No bit for gen2, so assume the CS parser is idle */ - if (INTEL_GEN(dev_priv) > 2 && + if (INTEL_GEN(engine->i915) > 2 && !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) idle = false; - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_engine_pm_put(engine); return idle; } +void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (__tasklet_is_scheduled(t)) { + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + } + + /* Otherwise flush the tasklet if it was running on another cpu */ + tasklet_unlock_wait(t); +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1150,31 +1077,19 @@ static bool ring_is_idle(struct intel_engine_cs *engine) bool intel_engine_is_idle(struct intel_engine_cs *engine) { /* More white lies, if wedged, hw state is inconsistent */ - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) return true; - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_is_awake(engine)) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) { - struct tasklet_struct *t = &engine->execlists.tasklet; + if (execlists_active(&engine->execlists)) { + synchronize_hardirq(engine->i915->drm.pdev->irq); - synchronize_hardirq(engine->i915->drm.irq); + intel_engine_flush_submission(engine); - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); - - if (READ_ONCE(engine->execlists.active)) + if (execlists_active(&engine->execlists)) return false; } @@ -1186,7 +1101,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return ring_is_idle(engine); } -bool intel_engines_are_idle(struct drm_i915_private *i915) +bool intel_engines_are_idle(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1195,14 +1110,14 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return true; /* Already parked (and passed an idleness test); must still be idle */ - if (!READ_ONCE(i915->gt.awake)) + if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1210,12 +1125,12 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) return true; } -void intel_engines_reset_default_submission(struct drm_i915_private *i915) +void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) engine->set_default_submission(engine); } @@ -1227,6 +1142,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) case 3: /* maybe only uses physical not virtual addresses */ return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 4: + return !IS_I965G(engine->i915); /* who knows! */ case 6: return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ default: @@ -1234,20 +1151,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int which; - - which = 0; - for_each_engine(engine, i915, id) - if (engine->default_state) - which |= BIT(engine->uabi_class); - - return which; -} - static int print_sched_attr(struct drm_i915_private *i915, const struct i915_sched_attr *attr, char *buf, int x, int len) @@ -1316,16 +1219,46 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } +static struct intel_timeline *get_timeline(struct i915_request *rq) +{ + struct intel_timeline *tl; + + /* + * Even though we are holding the engine->active.lock here, there + * is no control over the submission queue per-se and we are + * inspecting the active state at a random point in time, with an + * unknown queue. Play safe and make sure the timeline remains valid. + * (Only being used for pretty printing, one extra kref shouldn't + * cause a camel stampede!) + */ + rcu_read_lock(); + tl = rcu_dereference(rq->timeline); + if (!kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + + return tl; +} + +static const char *repr_timer(const struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return "inactive"; + + if (timer_pending(t)) + return "active"; + + return "expired"; +} + static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - unsigned long flags; + struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; - if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) + if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); @@ -1372,25 +1305,28 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { + struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; const u8 num_entries = execlists->csb_size; unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", - ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), - ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), - num_entries); + drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.preempt), + repr_timer(&engine->execlists.timer)); read = execlists->csb_head; write = READ_ONCE(*execlists->csb_write); - drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", - read, write, - yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); + drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), + read, write, num_entries); + if (read >= num_entries) read = 0; if (write >= num_entries) @@ -1403,29 +1339,47 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - spin_lock_irqsave(&engine->active.lock, flags); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct i915_request *rq; - unsigned int count; + execlists_active_lock_bh(execlists); + rcu_read_lock(); + for (port = execlists->active; (rq = *port); port++) { char hdr[80]; - - rq = port_unpack(&execlists->port[idx], &count); - if (!rq) { - drm_printf(m, "\t\tELSP[%d] idle\n", idx); - } else if (!i915_request_signaled(rq)) { - snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", - idx, count, - i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, - hwsp_seqno(rq)); - print_request(m, rq, hdr); - } else { - print_request(m, rq, "\t\tELSP[%d] rq: "); + int len; + + len = snprintf(hdr, sizeof(hdr), + "\t\tActive[%d]: ", + (int)(port - execlists->active)); + if (!i915_request_signaled(rq)) { + struct intel_timeline *tl = get_timeline(rq); + + len += snprintf(hdr + len, sizeof(hdr) - len, + "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", + i915_ggtt_offset(rq->ring->vma), + tl ? tl->hwsp_offset : 0, + hwsp_seqno(rq)); + + if (tl) + intel_timeline_put(tl); } + snprintf(hdr + len, sizeof(hdr) - len, "rq: "); + print_request(m, rq, hdr); + } + for (port = execlists->pending; (rq = *port); port++) { + struct intel_timeline *tl = get_timeline(rq); + char hdr[80]; + + snprintf(hdr, sizeof(hdr), + "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", + (int)(port - execlists->pending), + i915_ggtt_offset(rq->ring->vma), + tl ? tl->hwsp_offset : 0, + hwsp_seqno(rq)); + print_request(m, rq, hdr); + + if (tl) + intel_timeline_put(tl); } - drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); - spin_unlock_irqrestore(&engine->active.lock, flags); + rcu_read_unlock(); + execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1469,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq) } } +static unsigned long list_count(struct list_head *list) +{ + struct list_head *pos; + unsigned long count = 0; + + list_for_each(pos, list) + count++; + + return count; +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1486,12 +1451,21 @@ void intel_engine_dump(struct intel_engine_cs *engine, va_end(ap); } - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); - drm_printf(m, "\tHangcheck: %d ms ago\n", - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + drm_printf(m, "\tBarriers?: %s\n", + yesno(!llist_empty(&engine->barrier_tasks))); + drm_printf(m, "\tLatency: %luus\n", + ewma__engine_latency_read(&engine->latency)); + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + drm_printf(m, "\tHeartbeat: %d ms ago\n", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + rcu_read_unlock(); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1501,6 +1475,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { + struct intel_timeline *tl = get_timeline(rq); + print_request(m, rq, "\t\tactive "); drm_printf(m, "\t\tring->start: 0x%08x\n", @@ -1513,17 +1489,28 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - rq->timeline->hwsp_offset); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } print_request_ring(m, rq); + + if (rq->context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); + } } + drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); spin_unlock_irqrestore(&engine->active.lock, flags); - wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); + drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); + wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); + intel_runtime_pm_put(engine->uncore->rpm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } @@ -1538,29 +1525,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static u8 user_class_map[] = { - [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, - [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, - [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, - [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, -}; - -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) -{ - if (class >= ARRAY_SIZE(user_class_map)) - return NULL; - - class = user_class_map[class]; - - GEM_BUG_ON(class > MAX_ENGINE_CLASS); - - if (instance > MAX_ENGINE_INSTANCE) - return NULL; - - return i915->engine_class[class][instance]; -} - /** * intel_enable_engine_stats() - Enable engine busy tracking on engine * @engine: engine to enable stats collection @@ -1578,8 +1542,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->active.lock, flags); - write_seqlock(&engine->stats.lock); + execlists_active_lock_bh(execlists); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1587,15 +1551,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } if (engine->stats.enabled++ == 0) { - const struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port; + struct i915_request *rq; engine->stats.enabled_at = ktime_get(); /* XXX submission method oblivious? */ - while (num_ports-- && port_isset(port)) { + for (port = execlists->active; (rq = *port); port++) engine->stats.active++; - port++; + + for (port = execlists->pending; (rq = *port); port++) { + /* Exclude any contexts already counted in active */ + if (!intel_context_inflight_count(rq->context)) + engine->stats.active++; } if (engine->stats.active) @@ -1603,8 +1571,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->active.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); + execlists_active_unlock_bh(execlists); return err; } @@ -1708,5 +1676,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "mock_engine.c" +#include "selftest_engine.c" #include "selftest_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c new file mode 100644 index 000000000000..6c6fd185457c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -0,0 +1,242 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" + +#include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_engine.h" +#include "intel_gt.h" +#include "intel_reset.h" + +/* + * While the engine is active, we send a periodic pulse along the engine + * to check on its health and to flush any idle-barriers. If that request + * is stuck, and we fail to preempt it, we declare the engine hung and + * issue a reset -- in the hope that restores progress. + */ + +static bool next_heartbeat(struct intel_engine_cs *engine) +{ + long delay; + + delay = READ_ONCE(engine->props.heartbeat_interval_ms); + if (!delay) + return false; + + delay = msecs_to_jiffies_timeout(delay); + if (delay >= HZ) + delay = round_jiffies_up_relative(delay); + schedule_delayed_work(&engine->heartbeat.work, delay); + + return true; +} + +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) +{ + engine->wakeref_serial = READ_ONCE(engine->serial) + 1; + i915_request_add_active_barriers(rq); +} + +static void show_heartbeat(const struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct drm_printer p = drm_debug_printer("heartbeat"); + + intel_engine_dump(engine, &p, + "%s heartbeat {prio:%d} not ticking\n", + engine->name, + rq->sched.attr.priority); +} + +static void heartbeat(struct work_struct *wrk) +{ + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), + }; + struct intel_engine_cs *engine = + container_of(wrk, typeof(*engine), heartbeat.work.work); + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + rq = engine->heartbeat.systole; + if (rq && i915_request_completed(rq)) { + i915_request_put(rq); + engine->heartbeat.systole = NULL; + } + + if (!intel_engine_pm_get_if_awake(engine)) + return; + + if (intel_gt_is_wedged(engine->gt)) + goto out; + + if (engine->heartbeat.systole) { + if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + /* + * Gradually raise the priority of the heartbeat to + * give high priority work [which presumably desires + * low latency and no jitter] the chance to naturally + * complete before being preempted. + */ + attr.priority = I915_PRIORITY_MASK; + if (rq->sched.attr.priority >= attr.priority) + attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + if (rq->sched.attr.priority >= attr.priority) + attr.priority = I915_PRIORITY_BARRIER; + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); + } else { + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); + } + goto out; + } + + if (engine->wakeref_serial == engine->serial) + goto out; + + mutex_lock(&ce->timeline->mutex); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + goto unlock; + + idle_pulse(engine, rq); + if (i915_modparams.enable_hangcheck) + engine->heartbeat.systole = i915_request_get(rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +unlock: + mutex_unlock(&ce->timeline->mutex); +out: + if (!next_heartbeat(engine)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + intel_engine_pm_put(engine); +} + +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + return; + + next_heartbeat(engine); +} + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine) +{ + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); +} + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine) +{ + INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); +} + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + int err; + + /* Send one last pulse before to cleanup persistent hogs */ + if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { + err = intel_engine_pulse(engine); + if (err) + return err; + } + + WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + + if (intel_engine_pm_get_if_awake(engine)) { + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } + + return 0; +} + +int intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + int err = 0; + + if (!intel_engine_has_preemption(engine)) + return -ENODEV; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + if (mutex_lock_interruptible(&ce->timeline->mutex)) + goto out_rpm; + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +out_unlock: + mutex_unlock(&ce->timeline->mutex); +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +int intel_engine_flush_barriers(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + int err = 0; + + if (llist_empty(&engine->barrier_tasks)) + return 0; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_rpm; + } + + idle_pulse(engine, rq); + i915_request_add(rq); + +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_heartbeat.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h new file mode 100644 index 000000000000..a7b8c0f9e005 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_HEARTBEAT_H +#define INTEL_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay); + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine); +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_pulse(struct intel_engine_cs *engine); +int intel_engine_flush_barriers(struct intel_engine_cs *engine); + +#endif /* INTEL_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index ae5b6baf6dff..ea90ab3e396e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -6,19 +6,26 @@ #include "i915_drv.h" +#include "intel_context.h" #include "intel_engine.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_engine_pool.h" +#include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_ring.h" static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); + struct intel_context *ce; void *map; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); - intel_gt_pm_get(engine->i915); + intel_gt_pm_get(engine->gt); /* Pin the default state for fast resets from atomic context. */ map = NULL; @@ -28,45 +35,124 @@ static int __engine_unpark(struct intel_wakeref *wf) if (!IS_ERR_OR_NULL(map)) engine->pinned_default_state = map; + /* Discard stale context state from across idling */ + ce = engine->kernel_context; + if (ce) { + GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); + + /* First poison the image to verify we never fully trust it */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { + struct drm_i915_gem_object *obj = ce->state->obj; + int type = i915_coherent_map_type(engine->i915); + + map = i915_gem_object_pin_map(obj, type); + if (!IS_ERR(map)) { + memset(map, CONTEXT_REDZONE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + } + + ce->ops->reset(ce); + } + if (engine->unpark) engine->unpark(engine); - intel_engine_init_hangcheck(engine); + intel_engine_unpark_heartbeat(engine); + return 0; +} + +#if IS_ENABLED(CONFIG_LOCKDEP) + +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +{ + unsigned long flags; + + local_irq_save(flags); + mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); + + return flags; +} + +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) +{ + mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); + local_irq_restore(flags); +} + +#else + +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +{ return 0; } -void intel_engine_pm_get(struct intel_engine_cs *engine) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { - intel_wakeref_get(&engine->i915->runtime_pm, &engine->wakeref, __engine_unpark); } -void intel_engine_park(struct intel_engine_cs *engine) +#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ + +static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) { + struct i915_request *rq = to_request(fence); + + ewma__engine_latency_add(&rq->engine->latency, + ktime_us_delta(rq->fence.timestamp, + rq->duration.emitted)); +} + +static void +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + ENGINE_TRACE(engine, "\n"); + /* - * We are committed now to parking this engine, make sure there - * will be no more interrupts arriving later and the engine - * is truly idle. + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); + spin_lock(&timelines->lock); - dev_err(engine->i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); } static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; + unsigned long flags; + bool result = true; - /* Already inside the kernel context, safe to power down. */ - if (engine->wakeref_serial == engine->serial) + /* GPU is pointing to the void, as good as in the kernel context. */ + if (intel_gt_is_wedged(engine->gt)) return true; - /* GPU is pointing to the void, as good as in the kernel context. */ - if (i915_reset_failed(engine->i915)) + GEM_BUG_ON(!intel_context_is_barrier(ce)); + + /* Already inside the kernel context, safe to power down. */ + if (engine->wakeref_serial == engine->serial) return true; /* @@ -80,19 +166,70 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); + + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ - return true; + goto out_unlock; /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; + i915_request_add_active_barriers(rq); - i915_request_add_barriers(rq); - __i915_request_commit(rq); + /* Install ourselves as a preemption barrier */ + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ + /* + * Use an interrupt for precise measurement of duration, + * otherwise we rely on someone else retiring all the requests + * which may delay the signaling (i.e. we will likely wait + * until the background request retirement running every + * second or two). + */ + BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); + dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); + rq->duration.emitted = ktime_get(); + } + + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); - return false; + result = false; +out_unlock: + __timeline_mark_unlock(ce, flags); + return result; +} + +static void call_idle_barriers(struct intel_engine_cs *engine) +{ + struct llist_node *node, *next; + + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + struct dma_fence_cb *cb = + container_of((struct list_head *)node, + typeof(*cb), node); + + cb->func(ERR_PTR(-EAGAIN), cb); + } } static int __engine_park(struct intel_wakeref *wf) @@ -112,9 +249,13 @@ static int __engine_park(struct intel_wakeref *wf) if (!switch_to_kernel_context(engine)) return -EBUSY; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); + + call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); + intel_engine_pool_park(&engine->pool); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); @@ -129,16 +270,24 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->i915); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } -void intel_engine_pm_put(struct intel_engine_cs *engine) -{ - intel_wakeref_put(&engine->i915->runtime_pm, &engine->wakeref, __engine_park); -} +static const struct intel_wakeref_ops wf_ops = { + .get = __engine_unpark, + .put = __engine_park, +}; void intel_engine_init__pm(struct intel_engine_cs *engine) { - intel_wakeref_init(&engine->wakeref); + struct intel_runtime_pm *rpm = engine->uncore->rpm; + + intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); + intel_engine_init_heartbeat(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index a11c893f64c6..e52c2b0cb245 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,21 +7,60 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" -struct drm_i915_private; +static inline bool +intel_engine_pm_is_awake(const struct intel_engine_cs *engine) +{ + return intel_wakeref_is_active(&engine->wakeref); +} -void intel_engine_pm_get(struct intel_engine_cs *engine); -void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline void intel_engine_pm_get(struct intel_engine_cs *engine) +{ + intel_wakeref_get(&engine->wakeref); +} -static inline bool -intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) { return intel_wakeref_get_if_active(&engine->wakeref); } -void intel_engine_park(struct intel_engine_cs *engine); +static inline void intel_engine_pm_put(struct intel_engine_cs *engine) +{ + intel_wakeref_put(&engine->wakeref); +} + +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + +static inline struct i915_request * +intel_engine_create_kernel_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* + * The engine->kernel_context is special as it is used inside + * the engine-pm barrier (see __engine_park()), circumventing + * the usual mutexes and relying on the engine-pm barrier + * instead. So whenever we use the engine->kernel_context + * outside of the barrier, we must manually handle the + * engine wakeref to serialise with the use inside. + */ + intel_engine_pm_get(engine); + rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); + + return rq; +} void intel_engine_init__pm(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c new file mode 100644 index 000000000000..397186818305 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -0,0 +1,190 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "gem/i915_gem_object.h" + +#include "i915_drv.h" +#include "intel_engine_pm.h" +#include "intel_engine_pool.h" + +static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) +{ + return container_of(pool, struct intel_engine_cs, pool); +} + +static struct list_head * +bucket_for_size(struct intel_engine_pool *pool, size_t sz) +{ + int n; + + /* + * Compute a power-of-two bucket, but throw everything greater than + * 16KiB into the same bucket: i.e. the buckets hold objects of + * (1 page, 2 pages, 4 pages, 8+ pages). + */ + n = fls(sz >> PAGE_SHIFT) - 1; + if (n >= ARRAY_SIZE(pool->cache_list)) + n = ARRAY_SIZE(pool->cache_list) - 1; + + return &pool->cache_list[n]; +} + +static void node_free(struct intel_engine_pool_node *node) +{ + i915_gem_object_put(node->obj); + i915_active_fini(&node->active); + kfree(node); +} + +static int pool_active(struct i915_active *ref) +{ + struct intel_engine_pool_node *node = + container_of(ref, typeof(*node), active); + struct dma_resv *resv = node->obj->base.resv; + int err; + + if (dma_resv_trylock(resv)) { + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); + } + + err = i915_gem_object_pin_pages(node->obj); + if (err) + return err; + + /* Hide this pinned object from the shrinker until retired */ + i915_gem_object_make_unshrinkable(node->obj); + + return 0; +} + +__i915_active_call +static void pool_retire(struct i915_active *ref) +{ + struct intel_engine_pool_node *node = + container_of(ref, typeof(*node), active); + struct intel_engine_pool *pool = node->pool; + struct list_head *list = bucket_for_size(pool, node->obj->base.size); + unsigned long flags; + + GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); + + i915_gem_object_unpin_pages(node->obj); + + /* Return this object to the shrinker pool */ + i915_gem_object_make_purgeable(node->obj); + + spin_lock_irqsave(&pool->lock, flags); + list_add(&node->link, list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct intel_engine_pool_node * +node_create(struct intel_engine_pool *pool, size_t sz) +{ + struct intel_engine_cs *engine = to_engine(pool); + struct intel_engine_pool_node *node; + struct drm_i915_gem_object *obj; + + node = kmalloc(sizeof(*node), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!node) + return ERR_PTR(-ENOMEM); + + node->pool = pool; + i915_active_init(&node->active, pool_active, pool_retire); + + obj = i915_gem_object_create_internal(engine->i915, sz); + if (IS_ERR(obj)) { + i915_active_fini(&node->active); + kfree(node); + return ERR_CAST(obj); + } + + i915_gem_object_set_readonly(obj); + + node->obj = obj; + return node; +} + +static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +{ + if (intel_engine_is_virtual(engine)) + engine = intel_virtual_engine_get_sibling(engine, 0); + + GEM_BUG_ON(!engine); + return &engine->pool; +} + +struct intel_engine_pool_node * +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) +{ + struct intel_engine_pool *pool = lookup_pool(engine); + struct intel_engine_pool_node *node; + struct list_head *list; + unsigned long flags; + int ret; + + GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); + + size = PAGE_ALIGN(size); + list = bucket_for_size(pool, size); + + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry(node, list, link) { + if (node->obj->base.size < size) + continue; + list_del(&node->link); + break; + } + spin_unlock_irqrestore(&pool->lock, flags); + + if (&node->link == list) { + node = node_create(pool, size); + if (IS_ERR(node)) + return node; + } + + ret = i915_active_acquire(&node->active); + if (ret) { + node_free(node); + return ERR_PTR(ret); + } + + return node; +} + +void intel_engine_pool_init(struct intel_engine_pool *pool) +{ + int n; + + spin_lock_init(&pool->lock); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + INIT_LIST_HEAD(&pool->cache_list[n]); +} + +void intel_engine_pool_park(struct intel_engine_pool *pool) +{ + int n; + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct list_head *list = &pool->cache_list[n]; + struct intel_engine_pool_node *node, *nn; + + list_for_each_entry_safe(node, nn, list, link) + node_free(node); + + INIT_LIST_HEAD(list); + } +} + +void intel_engine_pool_fini(struct intel_engine_pool *pool) +{ + int n; + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + GEM_BUG_ON(!list_empty(&pool->cache_list[n])); +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h new file mode 100644 index 000000000000..1bd89cadc3b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_ENGINE_POOL_H +#define INTEL_ENGINE_POOL_H + +#include "intel_engine_pool_types.h" +#include "i915_active.h" +#include "i915_request.h" + +struct intel_engine_pool_node * +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); + +static inline int +intel_engine_pool_mark_active(struct intel_engine_pool_node *node, + struct i915_request *rq) +{ + return i915_active_add_request(&node->active, rq); +} + +static inline void +intel_engine_pool_put(struct intel_engine_pool_node *node) +{ + i915_active_release(&node->active); +} + +void intel_engine_pool_init(struct intel_engine_pool *pool); +void intel_engine_pool_park(struct intel_engine_pool *pool); +void intel_engine_pool_fini(struct intel_engine_pool *pool); + +#endif /* INTEL_ENGINE_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h new file mode 100644 index 000000000000..e31ee361b76f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_ENGINE_POOL_TYPES_H +#define INTEL_ENGINE_POOL_TYPES_H + +#include <linux/list.h> +#include <linux/spinlock.h> + +#include "i915_active_types.h" + +struct drm_i915_gem_object; + +struct intel_engine_pool { + spinlock_t lock; + struct list_head cache_list[4]; +}; + +struct intel_engine_pool_node { + struct i915_active active; + struct drm_i915_gem_object *obj; + struct list_head link; + struct intel_engine_pool *pool; +}; + +#endif /* INTEL_ENGINE_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 43e975a26016..92be41a6903c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -7,23 +7,47 @@ #ifndef __INTEL_ENGINE_TYPES__ #define __INTEL_ENGINE_TYPES__ +#include <linux/average.h> #include <linux/hashtable.h> #include <linux/irq_work.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/llist.h> +#include <linux/rbtree.h> +#include <linux/timer.h> #include <linux/types.h> +#include <linux/workqueue.h> #include "i915_gem.h" -#include "i915_gem_batch_pool.h" #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "i915_timeline_types.h" +#include "intel_engine_pool_types.h" #include "intel_sseu.h" +#include "intel_timeline_types.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" +/* Legacy HW Engine ID */ + +#define RCS0_HW 0 +#define VCS0_HW 1 +#define BCS0_HW 2 +#define VECS0_HW 3 +#define VCS1_HW 4 +#define VCS2_HW 6 +#define VCS3_HW 7 +#define VECS1_HW 12 + +/* Gen11+ HW Engine class + instance */ +#define RENDER_CLASS 0 +#define VIDEO_DECODE_CLASS 1 +#define VIDEO_ENHANCEMENT_CLASS 2 +#define COPY_ENGINE_CLASS 3 +#define OTHER_CLASS 4 +#define MAX_ENGINE_CLASS 4 +#define MAX_ENGINE_INSTANCE 3 + #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -35,6 +59,8 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct intel_gt; +struct intel_ring; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -53,44 +79,6 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; -struct intel_engine_hangcheck { - u64 acthd; - u32 last_ring; - u32 last_head; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct kref ref; - struct i915_vma *vma; - void *vaddr; - - struct i915_timeline *timeline; - struct list_head request_list; - struct list_head active_link; - - /* - * As we have two types of rings, one global to the engine used - * by ringbuffer submission and those that are exclusive to a - * context used by execlists, we have to play safe and allow - * atomic updates to the pin_count. However, the actual pinning - * of the context is either done during initialisation for - * ringbuffer submission or serialised as part of the context - * pinning for execlists, and so we do not need a mutex ourselves - * to serialise intel_ring_pin/intel_ring_unpin. - */ - atomic_t pin_count; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - /* * we use a single page to load ctx workarounds so all of these * values are referred in terms of dwords @@ -129,8 +117,12 @@ enum intel_engine_id { VECS1, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES +#define INVALID_ENGINE ((enum intel_engine_id)-1) }; +/* A simple estimator for the round-trip latency of an engine */ +DECLARE_EWMA(_engine_latency, 6, 4) + struct st_preempt_hang { struct completion completion; unsigned int count; @@ -150,6 +142,16 @@ struct intel_engine_execlists { struct tasklet_struct tasklet; /** + * @timer: kick the current context if its timeslice expires + */ + struct timer_list timer; + + /** + * @preempt: reset the current context if it fails to give way + */ + struct timer_list preempt; + + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ struct i915_priolist default_priolist; @@ -172,51 +174,28 @@ struct intel_engine_execlists { */ u32 __iomem *ctrl_reg; +#define EXECLIST_MAX_PORTS 2 /** - * @port: execlist port states + * @active: the currently known context executing on HW + */ + struct i915_request * const *active; + /** + * @inflight: the set of contexts submitted and acknowleged by HW * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. + * The set of inflight contexts is managed by reading CS events + * from the HW. On a context-switch event (not preemption), we + * know the HW has transitioned from port0 to port1, and we + * advance our inflight/active tracking accordingly. */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - -#define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - + struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. + * @pending: the next set of contexts submitted to ELSP * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. + * We store the array of contexts that we submit to HW (via ELSP) and + * promote them to the inflight array once HW has signaled the + * preemption or idle-to-active event. */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 + struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; /** * @port_mask: number of execlist ports - 1 @@ -224,6 +203,16 @@ struct intel_engine_execlists { unsigned int port_mask; /** + * @switch_priority_hint: Second context priority. + * + * We submit multiple contexts to the HW simultaneously and would + * like to occasionally switch between them to emulate timeslicing. + * To know when timeslicing is suitable, we track the priority of + * the context submitted second. + */ + int switch_priority_hint; + + /** * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of @@ -258,11 +247,6 @@ struct intel_engine_execlists { u32 *csb_status; /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - - /** * @csb_size: context status buffer FIFO size */ u8 csb_size; @@ -279,39 +263,52 @@ struct intel_engine_execlists { struct intel_engine_cs { struct drm_i915_private *i915; + struct intel_gt *gt; struct intel_uncore *uncore; char name[INTEL_ENGINE_CS_MAX_NAME]; enum intel_engine_id id; + enum intel_engine_id legacy_idx; + unsigned int hw_id; unsigned int guc_id; - intel_engine_mask_t mask; - u8 uabi_class; + intel_engine_mask_t mask; u8 class; u8 instance; + + u16 uabi_class; + u16 uabi_instance; + + u32 uabi_capabilities; u32 context_size; u32 mmio_base; - u32 uabi_capabilities; + unsigned int context_tag; +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) - struct intel_sseu sseu; + struct rb_node uabi_node; - struct intel_ring *buffer; + struct intel_sseu sseu; struct { spinlock_t lock; struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ } active; struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ - struct intel_context *preempt_context; /* pinned; optional */ intel_engine_mask_t saturated; /* submitting semaphores too late? */ + struct { + struct delayed_work work; + struct i915_request *systole; + } heartbeat; + unsigned long serial; unsigned long wakeref_serial; @@ -319,6 +316,18 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; + struct { + struct intel_ring *ring; + struct intel_timeline *timeline; + } legacy; + + /* + * We track the average duration of the idle pulse on parking the + * engine to keep an estimate of the how the fast the engine is + * under ideal conditions. + */ + struct ewma__engine_latency latency; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -375,7 +384,7 @@ struct intel_engine_cs { * when the command parser is enabled. Prevents the client from * modifying the batch contents after software parsing. */ - struct i915_gem_batch_pool batch_pool; + struct intel_engine_pool pool; struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; @@ -392,7 +401,10 @@ struct intel_engine_cs { struct { void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); + + void (*rewind)(struct intel_engine_cs *engine, bool stalled); + void (*cancel)(struct intel_engine_cs *engine); + void (*finish)(struct intel_engine_cs *engine); } reset; @@ -404,7 +416,6 @@ struct intel_engine_cs { const struct intel_context_ops *cops; int (*request_alloc)(struct i915_request *rq); - int (*init_context)(struct i915_request *rq); int (*emit_flush)(struct i915_request *request, u32 mode); #define EMIT_INVALIDATE BIT(0) @@ -443,29 +454,29 @@ struct intel_engine_cs { void (*schedule)(struct i915_request *request, const struct i915_sched_attr *attr); - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*destroy)(struct intel_engine_cs *engine); + void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; - struct intel_engine_hangcheck hangcheck; - -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -523,12 +534,25 @@ struct intel_engine_cs { */ ktime_t total; } stats; + + struct { + unsigned long heartbeat_interval_ms; + unsigned long preempt_timeout_ms; + unsigned long stop_timeout_ms; + unsigned long timeslice_duration_ms; + } props; }; static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline bool @@ -561,20 +585,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) +static inline bool +intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) +{ + return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; +} -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) +#define instdone_has_slice(dev_priv___, sseu___, slice___) \ + ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ + (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ + intel_sseu_has_subslice(sseu__, 0, subslice__)) +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ + (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ + (slice_) += ((subslice_) == 0)) \ + for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ + (instdone_has_subslice(dev_priv_, sseu_, slice_, \ + subslice_))) #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c new file mode 100644 index 000000000000..9e7f12bef828 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -0,0 +1,299 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/list.h> +#include <linux/list_sort.h> +#include <linux/llist.h> + +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_engine_user.h" +#include "intel_gt.h" + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + struct rb_node *p = i915->uabi_engines.rb_node; + + while (p) { + struct intel_engine_cs *it = + rb_entry(p, typeof(*it), uabi_node); + + if (class < it->uabi_class) + p = p->rb_left; + else if (class > it->uabi_class || + instance > it->uabi_instance) + p = p->rb_right; + else if (instance < it->uabi_instance) + p = p->rb_left; + else + return it; + } + + return NULL; +} + +void intel_engine_add_user(struct intel_engine_cs *engine) +{ + llist_add((struct llist_node *)&engine->uabi_node, + (struct llist_head *)&engine->i915->uabi_engines); +} + +static const u8 uabi_classes[] = { + [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, + [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, + [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, + [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, +}; + +static int engine_cmp(void *priv, struct list_head *A, struct list_head *B) +{ + const struct intel_engine_cs *a = + container_of((struct rb_node *)A, typeof(*a), uabi_node); + const struct intel_engine_cs *b = + container_of((struct rb_node *)B, typeof(*b), uabi_node); + + if (uabi_classes[a->class] < uabi_classes[b->class]) + return -1; + if (uabi_classes[a->class] > uabi_classes[b->class]) + return 1; + + if (a->instance < b->instance) + return -1; + if (a->instance > b->instance) + return 1; + + return 0; +} + +static struct llist_node *get_engines(struct drm_i915_private *i915) +{ + return llist_del_all((struct llist_head *)&i915->uabi_engines); +} + +static void sort_engines(struct drm_i915_private *i915, + struct list_head *engines) +{ + struct llist_node *pos, *next; + + llist_for_each_safe(pos, next, get_engines(i915)) { + struct intel_engine_cs *engine = + container_of((struct rb_node *)pos, typeof(*engine), + uabi_node); + list_add((struct list_head *)&engine->uabi_node, engines); + } + list_sort(NULL, engines, engine_cmp); +} + +static void set_scheduler_caps(struct drm_i915_private *i915) +{ + static const struct { + u8 engine; + u8 sched; + } map[] = { +#define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) } + MAP(HAS_PREEMPTION, PREEMPTION), + MAP(HAS_SEMAPHORES, SEMAPHORES), + MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS), +#undef MAP + }; + struct intel_engine_cs *engine; + u32 enabled, disabled; + + enabled = 0; + disabled = 0; + for_each_uabi_engine(engine, i915) { /* all engines must agree! */ + int i; + + if (engine->schedule) + enabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + else + disabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (engine->flags & BIT(map[i].engine)) + enabled |= BIT(map[i].sched); + else + disabled |= BIT(map[i].sched); + } + } + + i915->caps.scheduler = enabled & ~disabled; + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) + i915->caps.scheduler = 0; +} + +const char *intel_engine_class_repr(u8 class) +{ + static const char * const uabi_names[] = { + [RENDER_CLASS] = "rcs", + [COPY_ENGINE_CLASS] = "bcs", + [VIDEO_DECODE_CLASS] = "vcs", + [VIDEO_ENHANCEMENT_CLASS] = "vecs", + }; + + if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class]) + return "xxx"; + + return uabi_names[class]; +} + +struct legacy_ring { + struct intel_gt *gt; + u8 class; + u8 instance; +}; + +static int legacy_ring_idx(const struct legacy_ring *ring) +{ + static const struct { + u8 base, max; + } map[] = { + [RENDER_CLASS] = { RCS0, 1 }, + [COPY_ENGINE_CLASS] = { BCS0, 1 }, + [VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS }, + [VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS }, + }; + + if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) + return INVALID_ENGINE; + + if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max)) + return INVALID_ENGINE; + + return map[ring->class].base + ring->instance; +} + +static void add_legacy_ring(struct legacy_ring *ring, + struct intel_engine_cs *engine) +{ + if (engine->gt != ring->gt || engine->class != ring->class) { + ring->gt = engine->gt; + ring->class = engine->class; + ring->instance = 0; + } + + engine->legacy_idx = legacy_ring_idx(ring); + if (engine->legacy_idx != INVALID_ENGINE) + ring->instance++; +} + +void intel_engines_driver_register(struct drm_i915_private *i915) +{ + struct legacy_ring ring = {}; + u8 uabi_instances[4] = {}; + struct list_head *it, *next; + struct rb_node **p, *prev; + LIST_HEAD(engines); + + sort_engines(i915, &engines); + + prev = NULL; + p = &i915->uabi_engines.rb_node; + list_for_each_safe(it, next, &engines) { + struct intel_engine_cs *engine = + container_of((struct rb_node *)it, typeof(*engine), + uabi_node); + char old[sizeof(engine->name)]; + + if (intel_gt_has_init_error(engine->gt)) + continue; /* ignore incomplete engines */ + + GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); + engine->uabi_class = uabi_classes[engine->class]; + + GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances)); + engine->uabi_instance = uabi_instances[engine->uabi_class]++; + + /* Replace the internal name with the final user facing name */ + memcpy(old, engine->name, sizeof(engine->name)); + scnprintf(engine->name, sizeof(engine->name), "%s%u", + intel_engine_class_repr(engine->class), + engine->uabi_instance); + DRM_DEBUG_DRIVER("renamed %s to %s\n", old, engine->name); + + rb_link_node(&engine->uabi_node, prev, p); + rb_insert_color(&engine->uabi_node, &i915->uabi_engines); + + GEM_BUG_ON(intel_engine_lookup_user(i915, + engine->uabi_class, + engine->uabi_instance) != engine); + + /* Fix up the mapping to match default execbuf::user_map[] */ + add_legacy_ring(&ring, engine); + + prev = &engine->uabi_node; + p = &prev->rb_right; + } + + if (IS_ENABLED(CONFIG_DRM_I915_SELFTESTS) && + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + struct intel_engine_cs *engine; + unsigned int isolation; + int class, inst; + int errors = 0; + + for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) { + for (inst = 0; inst < uabi_instances[class]; inst++) { + engine = intel_engine_lookup_user(i915, + class, inst); + if (!engine) { + pr_err("UABI engine not found for { class:%d, instance:%d }\n", + class, inst); + errors++; + continue; + } + + if (engine->uabi_class != class || + engine->uabi_instance != inst) { + pr_err("Wrong UABI engine:%s { class:%d, instance:%d } found for { class:%d, instance:%d }\n", + engine->name, + engine->uabi_class, + engine->uabi_instance, + class, inst); + errors++; + continue; + } + } + } + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + isolation = intel_engines_has_context_isolation(i915); + for_each_uabi_engine(engine, i915) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((isolation & bit) != expected) { + pr_err("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + errors++; + } + } + + if (WARN(errors, "Invalid UABI engine mapping found")) + i915->uabi_engines = RB_ROOT; + } + + set_scheduler_caps(i915); +} + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + unsigned int which; + + which = 0; + for_each_uabi_engine(engine, i915) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h new file mode 100644 index 000000000000..f845ea1cbfaa --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_USER_H +#define INTEL_ENGINE_USER_H + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_engine_cs; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); + +void intel_engine_add_user(struct intel_engine_cs *engine); +void intel_engines_driver_register(struct drm_i915_private *i915); + +const char *intel_engine_class_repr(u8 class); + +#endif /* INTEL_ENGINE_USER_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c new file mode 100644 index 000000000000..531d501be01f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -0,0 +1,1486 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/stop_machine.h> + +#include <asm/set_memory.h> +#include <asm/smp.h> + +#include "intel_gt.h" +#include "i915_drv.h" +#include "i915_scatterlist.h" +#include "i915_vgpu.h" + +#include "intel_gtt.h" + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma); + +static void i915_ggtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ + if (i915_node_color_differs(node, color)) + *start += I915_GTT_PAGE_SIZE; + + /* + * Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ + node = list_next_entry(node, node_list); + if (node->color != color) + *end -= I915_GTT_PAGE_SIZE; +} + +static int ggtt_init_hw(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); + + ggtt->vm.is_ggtt = true; + + /* Only VLV supports read-only GGTT mappings */ + ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); + + if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) + ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; + + if (ggtt->mappable_end) { + if (!io_mapping_init_wc(&ggtt->iomap, + ggtt->gmadr.start, + ggtt->mappable_end)) { + ggtt->vm.cleanup(&ggtt->vm); + return -EIO; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, + ggtt->mappable_end); + } + + i915_ggtt_init_fences(ggtt); + + return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @i915: i915 device + */ +int i915_ggtt_init_hw(struct drm_i915_private *i915) +{ + int ret; + + stash_init(&i915->mm.wc_stash); + + /* + * Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. + */ + ret = ggtt_init_hw(&i915->ggtt); + if (ret) + return ret; + + return 0; +} + +/* + * Certain Gen5 chipsets require require idling the GPU before + * unmapping anything from the GTT when VT-d is enabled. + */ +static bool needs_idle_maps(struct drm_i915_private *i915) +{ + /* + * Query intel_iommu to see if we need the workaround. Presumably that + * was loaded first. + */ + return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); +} + +static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + /* + * Don't bother messing with faults pre GEN6 as we have little + * documentation supporting that it's a good idea. + */ + if (INTEL_GEN(i915) < 6) + return; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + ggtt->invalidate(ggtt); +} + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) +{ + ggtt_suspend_mappings(&i915->ggtt); +} + +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + spin_lock_irq(&uncore->lock); + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); + spin_unlock_irq(&uncore->lock); +} + +static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + /* + * Note that as an uncached mmio write, this will flush the + * WCB of the writes into the GGTT before it triggers the invalidate. + */ + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); +} + +static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; + + gen8_ggtt_invalidate(ggtt); + + if (INTEL_GEN(i915) >= 12) + intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + else + intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); +} + +static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + intel_gtt_chipset_flush(); +} + +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); + + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct sgt_iter sgt_iter; + gen8_pte_t __iomem *gtt_entries; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); + dma_addr_t addr; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, sgt_iter, vma->pages) + gen8_set_pte(gtt_entries++, pte_encode | addr); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + iowrite32(vm->pte_encode(addr, level, flags), pte); + + ggtt->invalidate(ggtt); +} + +/* + * Binds an object into the global gtt with the specified cache level. + * The object will be accessible to the GPU via commands whose operands + * reference offsets within the global GTT as well as accessible by the GPU + * through the GMADR mapped BAR (i915->mm.gtt->gtt). + */ +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + struct sgt_iter iter; + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, vma->pages) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void nop_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0].encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); +} + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +struct insert_entries { + struct i915_address_space *vm; + struct i915_vma *vma; + enum i915_cache_level level; + u32 flags; +}; + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct insert_entries arg = { vm, vma, level, flags }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +struct clear_range { + struct i915_address_space *vm; + u64 start; + u64 length; +}; + +static int bxt_vtd_ggtt_clear_range__cb(void *_arg) +{ + struct clear_range *arg = _arg; + + gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, + u64 start, + u64 length) +{ + struct clear_range arg = { vm, start, length }; + + stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); +} + +static void gen6_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + gen6_pte_t scratch_pte, __iomem *gtt_base = + (gen6_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = vm->scratch[0].encode; + for (i = 0; i < num_entries; i++) + iowrite32(scratch_pte, >t_base[i]); +} + +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); +} + +static void i915_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, + flags); +} + +static void i915_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); +} + +static int ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + u32 pte_flags; + + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ + pte_flags = 0; + if (i915_gem_object_is_readonly(obj)) + pte_flags |= PTE_READ_ONLY; + + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + + /* + * Without aliasing PPGTT there's no difference between + * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally + * upgrade to both bound if we bind either to avoid double-binding. + */ + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); + + return 0; +} + +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) +{ + u64 size; + int ret; + + if (!USES_GUC(ggtt->vm.i915)) + return 0; + + GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); + + return ret; +} + +static void ggtt_release_guc_top(struct i915_ggtt *ggtt) +{ + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} + +static void cleanup_init_ggtt(struct i915_ggtt *ggtt) +{ + ggtt_release_guc_top(ggtt); + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); +} + +static int init_ggtt(struct i915_ggtt *ggtt) +{ + /* + * Let GEM Manage all of the aperture. + * + * However, leave one page at the end still bound to the scratch page. + * There are a number of places where the hardware apparently prefetches + * past the end of the object, and we've seen multiple hangs with the + * GPU head pointer stuck in a batchbuffer bound at the last page of the + * aperture. One page should be enough to keep any prefetching inside + * of the aperture. + */ + unsigned long hole_start, hole_end; + struct drm_mm_node *entry; + int ret; + + /* + * GuC requires all resources that we're sharing with it to be placed in + * non-WOPCM memory. If GuC is not present or not in use we still need a + * small bias as ring wraparound at offset 0 sometimes hangs. No idea + * why. + */ + ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, + intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); + + ret = intel_vgt_balloon(ggtt); + if (ret) + return ret; + + mutex_init(&ggtt->error_mutex); + if (ggtt->mappable_end) { + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (ret) + return ret; + } + + /* + * The upper portion of the GuC address space has a sizeable hole + * (several MB) that is inaccessible by GuC. Reserve this range within + * GGTT as it can comfortably hold GuC/HuC firmware images. + */ + ret = ggtt_reserve_guc_top(ggtt); + if (ret) + goto err; + + /* Clear any non-preallocated blocks */ + drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { + DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); + ggtt->vm.clear_range(&ggtt->vm, hole_start, + hole_end - hole_start); + } + + /* And finally clear the reserved guard page */ + ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); + + return 0; + +err: + cleanup_init_ggtt(ggtt); + return ret; +} + +static int aliasing_gtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int ret; + + /* Currently applicable only to VLV */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; + + if (flags & I915_VMA_ALLOC) { + ret = alias->vm.allocate_va_range(&alias->vm, + vma->node.start, + vma->size); + if (ret) + return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); + alias->vm.insert_entries(&alias->vm, vma, + cache_level, pte_flags); + } + + if (flags & I915_VMA_GLOBAL_BIND) + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + return 0; +} + +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) +{ + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + struct i915_address_space *vm = vma->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } + + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { + struct i915_address_space *vm = + &i915_vm_to_ggtt(vma->vm)->alias->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } +} + +static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(ggtt->vm.gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + /* + * Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); + if (err) + goto err_ppgtt; + + ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; + + GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); + ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; + + GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); + ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_vm_put(&ppgtt->vm); + return err; +} + +static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&ggtt->alias); + if (!ppgtt) + return; + + i915_vm_put(&ppgtt->vm); + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; +} + +int i915_init_ggtt(struct drm_i915_private *i915) +{ + int ret; + + ret = init_ggtt(&i915->ggtt); + if (ret) + return ret; + + if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { + ret = init_aliasing_ppgtt(&i915->ggtt); + if (ret) + cleanup_init_ggtt(&i915->ggtt); + } + + return 0; +} + +static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma, *vn; + + atomic_set(&ggtt->vm.open, 0); + + rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ + flush_workqueue(ggtt->vm.i915->wq); + + mutex_lock(&ggtt->vm.mutex); + + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) + WARN_ON(__i915_vma_unbind(vma)); + + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); + + ggtt_release_guc_top(ggtt); + intel_vgt_deballoon(ggtt); + + ggtt->vm.cleanup(&ggtt->vm); + + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); + + arch_phys_wc_del(ggtt->mtrr); + + if (ggtt->iomap.size) + io_mapping_fini(&ggtt->iomap); +} + +/** + * i915_ggtt_driver_release - Clean up GGTT hardware initialization + * @i915: i915 device + */ +void i915_ggtt_driver_release(struct drm_i915_private *i915) +{ + struct pagevec *pvec; + + fini_aliasing_ppgtt(&i915->ggtt); + + ggtt_cleanup_hw(&i915->ggtt); + + pvec = &i915->mm.wc_stash.pvec; + if (pvec->nr) { + set_pages_array_wb(pvec->pages, pvec->nr); + __pagevec_release(pvec); + } +} + +static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + +#ifdef CONFIG_X86_32 + /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ + if (bdw_gmch_ctl > 4) + bdw_gmch_ctl = 4; +#endif + + return bdw_gmch_ctl << 20; +} + +static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) +{ + gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GGMS_MASK; + + if (gmch_ctrl) + return 1 << (20 + gmch_ctrl); + + return 0; +} + +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + phys_addr_t phys_addr; + int ret; + + /* For Modern GENs the PTEs and register space are split in the BAR */ + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; + + /* + * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + ggtt->gsm = ioremap(phys_addr, size); + else + ggtt->gsm = ioremap_wc(phys_addr, size); + if (!ggtt->gsm) { + DRM_ERROR("Failed to map the ggtt page table\n"); + return -ENOMEM; + } + + ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(ggtt->gsm); + return ret; + } + + ggtt->vm.scratch[0].encode = + ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), + I915_CACHE_NONE, 0); + + return 0; +} + +int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + cleanup_scratch_page(vm); +} + +static struct resource pci_resource(struct pci_dev *pdev, int bar) +{ + return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} + +static int gen8_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + /* TODO: We're not aware of mappable constraints on gen8 yet */ + if (!IS_DGFX(i915)) { + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (IS_CHERRYVIEW(i915)) + size = chv_get_total_gtt_size(snb_gmch_ctl); + else + size = gen8_get_total_gtt_size(snb_gmch_ctl); + + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; + if (intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen8_ggtt_clear_range; + + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + + /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ + if (intel_ggtt_update_needs_vtd_wa(i915) || + IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + if (ggtt->vm.clear_range != nop_clear_range) + ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + } + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + ggtt->vm.pte_encode = gen8_pte_encode; + + setup_private_pat(ggtt->vm.gt->uncore); + + return ggtt_probe_common(ggtt, size); +} + +static u64 snb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 ivb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + pte |= GEN7_PTE_CACHE_L3_LLC; + break; + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 byt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; + + if (level != I915_CACHE_NONE) + pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; + + return pte; +} + +static u64 hsw_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (level != I915_CACHE_NONE) + pte |= HSW_WB_LLC_AGE3; + + return pte; +} + +static u64 iris_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE3; + break; + default: + pte |= HSW_WB_ELLC_LLC_AGE3; + break; + } + + return pte; +} + +static int gen6_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + + /* + * 64/512MB is the current min/max we actually know of, but this is + * just a coarse sanity check. + */ + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { + DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); + return -ENXIO; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.cleanup = gen6_gmch_remove; + + ggtt->invalidate = gen6_ggtt_invalidate; + + if (HAS_EDRAM(i915)) + ggtt->vm.pte_encode = iris_pte_encode; + else if (IS_HASWELL(i915)) + ggtt->vm.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(i915)) + ggtt->vm.pte_encode = byt_pte_encode; + else if (INTEL_GEN(i915) >= 7) + ggtt->vm.pte_encode = ivb_pte_encode; + else + ggtt->vm.pte_encode = snb_pte_encode; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + return ggtt_probe_common(ggtt, size); +} + +static void i915_gmch_remove(struct i915_address_space *vm) +{ + intel_gmch_remove(); +} + +static int i915_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + phys_addr_t gmadr_base; + int ret; + + ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } + + intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + + ggtt->gmadr = + (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + + ggtt->do_idle_maps = needs_idle_maps(i915); + ggtt->vm.insert_page = i915_ggtt_insert_page; + ggtt->vm.insert_entries = i915_ggtt_insert_entries; + ggtt->vm.clear_range = i915_ggtt_clear_range; + ggtt->vm.cleanup = i915_gmch_remove; + + ggtt->invalidate = gmch_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + if (unlikely(ggtt->do_idle_maps)) + dev_notice(i915->drm.dev, + "Applying Ironlake quirks for intel_iommu\n"); + + return 0; +} + +static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + int ret; + + ggtt->vm.gt = gt; + ggtt->vm.i915 = i915; + ggtt->vm.dma = &i915->drm.pdev->dev; + + if (INTEL_GEN(i915) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(i915) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); + if (ret) + return ret; + + if ((ggtt->vm.total - 1) >> 32) { + DRM_ERROR("We never expected a Global GTT with more than 32bits" + " of address space! Found %lldM!\n", + ggtt->vm.total >> 20); + ggtt->vm.total = 1ULL << 32; + ggtt->mappable_end = + min_t(u64, ggtt->mappable_end, ggtt->vm.total); + } + + if (ggtt->mappable_end > ggtt->vm.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->vm.total); + ggtt->mappable_end = ggtt->vm.total; + } + + /* GMADR is the PCI mmio aperture into the global GTT. */ + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); + DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); + DRM_DEBUG_DRIVER("DSM size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); + + return 0; +} + +/** + * i915_ggtt_probe_hw - Probe GGTT hardware location + * @i915: i915 device + */ +int i915_ggtt_probe_hw(struct drm_i915_private *i915) +{ + int ret; + + ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); + if (ret) + return ret; + + if (intel_vtd_active()) + dev_info(i915->drm.dev, "VT-d active for gfx access\n"); + + return 0; +} + +int i915_ggtt_enable_hw(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) + return -EIO; + + return 0; +} + +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) +{ + GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); + + ggtt->invalidate = guc_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) +{ + /* XXX Temporary pardon for error unload */ + if (ggtt->invalidate == gen8_ggtt_invalidate) + return; + + /* We should only be called after i915_ggtt_enable_guc() */ + GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +static void ggtt_restore_mappings(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma; + bool flush = false; + int open; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + mutex_lock(&ggtt->vm.mutex); + + /* First fill our portion of the GTT with scratch pages */ + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); + + /* clflush objects bound into the GGTT and rebind them. */ + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + continue; + + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); + WARN_ON(i915_vma_bind(vma, + obj ? obj->cache_level : 0, + PIN_GLOBAL, NULL)); + if (obj) { /* only used during resume => exclusive access */ + flush |= fetch_and_zero(&obj->write_domain); + obj->read_domains |= I915_GEM_DOMAIN_GTT; + } + } + + atomic_set(&ggtt->vm.open, open); + ggtt->invalidate(ggtt); + + mutex_unlock(&ggtt->vm.mutex); + + if (flush) + wbinvd_on_all_cpus(); +} + +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + ggtt_restore_mappings(ggtt); + + if (INTEL_GEN(i915) >= 8) + setup_private_pat(ggtt->vm.gt->uncore); +} + +static struct scatterlist * +rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int column, row; + unsigned int src_idx; + + for (column = 0; column < width; column++) { + src_idx = stride * (height - 1) + column + offset; + for (row = 0; row < height; row++) { + st->nents++; + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); + sg_dma_address(sg) = + i915_gem_object_get_dma_address(obj, src_idx); + sg_dma_len(sg) = I915_GTT_PAGE_SIZE; + sg = sg_next(sg); + src_idx -= stride; + } + } + + return sg; +} + +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_rotation_info_size(rot_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { + sg = rotate_pages(obj, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].stride, st, sg); + } + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static struct scatterlist * +remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int row; + + for (row = 0; row < height; row++) { + unsigned int left = width * I915_GTT_PAGE_SIZE; + + while (left) { + dma_addr_t addr; + unsigned int length; + + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + + addr = i915_gem_object_get_dma_address_len(obj, offset, &length); + + length = min(left, length); + + st->nents++; + + sg_set_page(sg, NULL, length, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = length; + sg = sg_next(sg); + + offset += length / I915_GTT_PAGE_SIZE; + left -= length; + } + + offset += stride - width; + } + + return sg; +} + +static noinline struct sg_table * +intel_remap_pages(struct intel_remapped_info *rem_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_remapped_info_size(rem_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { + sg = remap_pages(obj, rem_info->plane[i].offset, + rem_info->plane[i].width, rem_info->plane[i].height, + rem_info->plane[i].stride, st, sg); + } + + i915_sg_trim(st); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static noinline struct sg_table * +intel_partial_pages(const struct i915_ggtt_view *view, + struct drm_i915_gem_object *obj) +{ + struct sg_table *st; + struct scatterlist *sg, *iter; + unsigned int count = view->partial.size; + unsigned int offset; + int ret = -ENOMEM; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, count, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); + GEM_BUG_ON(!iter); + + sg = st->sgl; + st->nents = 0; + do { + unsigned int len; + + len = min(iter->length - (offset << PAGE_SHIFT), + count << PAGE_SHIFT); + sg_set_page(sg, NULL, len, 0); + sg_dma_address(sg) = + sg_dma_address(iter) + (offset << PAGE_SHIFT); + sg_dma_len(sg) = len; + + st->nents++; + count -= len >> PAGE_SHIFT; + if (count == 0) { + sg_mark_end(sg); + i915_sg_trim(st); /* Drop any unused tail entries. */ + + return st; + } + + sg = __sg_next(sg); + iter = __sg_next(iter); + offset = 0; + } while (1); + +err_sg_alloc: + kfree(st); +err_st_alloc: + return ERR_PTR(ret); +} + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma) +{ + int ret; + + /* + * The vma->pages are only valid within the lifespan of the borrowed + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so + * must be the vma->pages. A simple rule is that vma->pages must only + * be accessed when the obj->mm.pages are pinned. + */ + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + + switch (vma->ggtt_view.type) { + default: + GEM_BUG_ON(vma->ggtt_view.type); + /* fall through */ + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; + return 0; + + case I915_GGTT_VIEW_ROTATED: + vma->pages = + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_REMAPPED: + vma->pages = + intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: + vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); + break; + } + + ret = 0; + if (IS_ERR(vma->pages)) { + ret = PTR_ERR(vma->pages); + vma->pages = NULL; + DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", + vma->ggtt_view.type, ret); + } + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index eec31e36aca7..51b8718513bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -7,6 +7,15 @@ #ifndef _INTEL_GPU_COMMANDS_H_ #define _INTEL_GPU_COMMANDS_H_ +#include <linux/bitops.h> + +/* + * Target address alignments required for GPU access e.g. + * MI_STORE_DWORD_IMM. + */ +#define alignof_dword 4 +#define alignof_qword 8 + /* * Instruction field definitions used by the command parser */ @@ -105,6 +114,7 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -112,6 +122,8 @@ #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) +#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) +#define MI_SEMAPHORE_TOKEN_SHIFT 5 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ @@ -125,7 +137,10 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ +#define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) @@ -140,6 +155,7 @@ #define MI_FLUSH_DW_USE_PPGTT (0<<2) #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -149,7 +165,8 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) +#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) +#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ /* * 3D instructions used by the kernel @@ -179,11 +196,12 @@ #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) -#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2)) #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) +#define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) +#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22) +#define XY_MONO_SRC_COPY_IMM_BLT (2 << 29 | 0x71 << 22 | 5) #define BLT_WRITE_A (2<<20) #define BLT_WRITE_RGB (1<<20) #define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) @@ -200,6 +218,8 @@ #define DISPLAY_PLANE_A (0<<20) #define DISPLAY_PLANE_B (1<<20) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */ +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */ #define PIPE_CONTROL_FLUSH_L3 (1<<27) #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) @@ -207,6 +227,7 @@ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) @@ -214,7 +235,9 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) @@ -225,6 +248,29 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) +/* Opcodes for MI_MATH_INSTR */ +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) +/* Registers used as operands in MI_MATH_INSTR */ +#define MI_MATH_REG(x) (x) +#define MI_MATH_REG_SRCA 0x20 +#define MI_MATH_REG_SRCB 0x21 +#define MI_MATH_REG_ACCU 0x31 +#define MI_MATH_REG_ZF 0x32 +#define MI_MATH_REG_CF 0x33 + /* * Commands used only by the command parser */ @@ -241,7 +287,6 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) @@ -276,4 +321,31 @@ #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + +static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags) +{ + *cs++ = MI_BATCH_BUFFER_START | flags; + *cs++ = addr; + + return cs; +} + #endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c new file mode 100644 index 000000000000..da2b6e2ae692 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "debugfs_gt.h" +#include "i915_drv.h" +#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_mocs.h" +#include "intel_rc6.h" +#include "intel_renderstate.h" +#include "intel_rps.h" +#include "intel_uncore.h" +#include "intel_pm.h" + +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +{ + gt->i915 = i915; + gt->uncore = &i915->uncore; + + spin_lock_init(>->irq_lock); + + INIT_LIST_HEAD(>->closed_vma); + spin_lock_init(>->closed_lock); + + intel_gt_init_reset(gt); + intel_gt_init_requests(gt); + intel_gt_init_timelines(gt); + intel_gt_pm_init_early(gt); + + intel_rps_init_early(>->rps); + intel_uc_init_early(>->uc); +} + +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) +{ + gt->ggtt = ggtt; +} + +static void init_unused_ring(struct intel_gt *gt, u32 base) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_write(uncore, RING_CTL(base), 0); + intel_uncore_write(uncore, RING_HEAD(base), 0); + intel_uncore_write(uncore, RING_TAIL(base), 0); + intel_uncore_write(uncore, RING_START(base), 0); +} + +static void init_unused_rings(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + if (IS_I830(i915)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + init_unused_ring(gt, SRB2_BASE); + init_unused_ring(gt, SRB3_BASE); + } else if (IS_GEN(i915, 2)) { + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + } else if (IS_GEN(i915, 3)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, PRB2_BASE); + } +} + +int intel_gt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + int ret; + + gt->last_init_time = ktime_get(); + + /* Double layer security blanket, see i915_gem_init() */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) + intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); + + if (IS_HASWELL(i915)) + intel_uncore_write(uncore, + MI_PREDICATE_RESULT_2, + IS_HSW_GT3(i915) ? + LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + + /* Apply the GT workarounds... */ + intel_gt_apply_workarounds(gt); + /* ...and determine whether they are sticking. */ + intel_gt_verify_workarounds(gt, "init"); + + intel_gt_init_swizzling(gt); + + /* + * At least 830 can leave some of the unused rings + * "active" (ie. head != tail) after resume which + * will prevent c3 entry. Makes sure all unused rings + * are totally idle. + */ + init_unused_rings(gt); + + ret = i915_ppgtt_init_hw(gt); + if (ret) { + DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); + goto out; + } + + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(>->uc); + if (ret) { + i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); + goto out; + } + + intel_mocs_init(gt); + +out: + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + return ret; +} + +static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) +{ + intel_uncore_rmw(uncore, reg, 0, set); +} + +static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) +{ + intel_uncore_rmw(uncore, reg, clr, 0); +} + +static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) +{ + intel_uncore_rmw(uncore, reg, 0, 0); +} + +static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +{ + GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); + GEN6_RING_FAULT_REG_POSTING_READ(engine); +} + +void +intel_gt_clear_error_registers(struct intel_gt *gt, + intel_engine_mask_t engine_mask) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 eir; + + if (!IS_GEN(i915, 2)) + clear_register(uncore, PGTBL_ER); + + if (INTEL_GEN(i915) < 4) + clear_register(uncore, IPEIR(RENDER_RING_BASE)); + else + clear_register(uncore, IPEIR_I965); + + clear_register(uncore, EIR); + eir = intel_uncore_read(uncore, EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + rmw_set(uncore, EMR, eir); + intel_uncore_write(uncore, GEN2_IIR, + I915_MASTER_ERROR_INTERRUPT); + } + + if (INTEL_GEN(i915) >= 12) { + rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); + } else if (INTEL_GEN(i915) >= 8) { + rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); + } else if (INTEL_GEN(i915) >= 6) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine_masked(engine, gt, engine_mask, id) + gen8_clear_engine_error_register(engine); + } +} + +static void gen6_check_faults(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 fault; + + for_each_engine(engine, gt, id) { + fault = GEN6_RING_FAULT_REG_READ(engine); + if (fault & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? + "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } + } +} + +static void gen8_check_faults(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; + u32 fault; + + if (INTEL_GEN(gt->i915) >= 12) { + fault_reg = GEN12_RING_FAULT_REG; + fault_data0_reg = GEN12_FAULT_TLB_DATA0; + fault_data1_reg = GEN12_FAULT_TLB_DATA1; + } else { + fault_reg = GEN8_RING_FAULT_REG; + fault_data0_reg = GEN8_FAULT_TLB_DATA0; + fault_data1_reg = GEN8_FAULT_TLB_DATA1; + } + + fault = intel_uncore_read(uncore, fault_reg); + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = intel_uncore_read(uncore, fault_data0_reg); + fault_data1 = intel_uncore_read(uncore, fault_data1_reg); + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), + lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } +} + +void intel_gt_check_and_clear_faults(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + /* From GEN8 onwards we only have one 'All Engine Fault Register' */ + if (INTEL_GEN(i915) >= 8) + gen8_check_faults(gt); + else if (INTEL_GEN(i915) >= 6) + gen6_check_faults(gt); + else + return; + + intel_gt_clear_error_registers(gt, ALL_ENGINES); +} + +void intel_gt_flush_ggtt_writes(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + intel_wakeref_t wakeref; + + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). + */ + + wmb(); + + if (INTEL_INFO(gt->i915)->has_coherent_ggtt) + return; + + intel_gt_chipset_flush(gt); + + with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { + unsigned long flags; + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_posting_read_fw(uncore, + RING_HEAD(RENDER_RING_BASE)); + spin_unlock_irqrestore(&uncore->lock, flags); + } +} + +void intel_gt_chipset_flush(struct intel_gt *gt) +{ + wmb(); + if (INTEL_GEN(gt->i915) < 6) + intel_gtt_chipset_flush(); +} + +void intel_gt_driver_register(struct intel_gt *gt) +{ + intel_rps_driver_register(>->rps); + + debugfs_gt_register(gt); +} + +static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + gt->scratch = i915_vma_make_unshrinkable(vma); + + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void intel_gt_fini_scratch(struct intel_gt *gt) +{ + i915_vma_unpin_and_release(>->scratch, 0); +} + +static struct i915_address_space *kernel_vm(struct intel_gt *gt) +{ + if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) + return &i915_ppgtt_create(gt)->vm; + else + return i915_vm_get(>->ggtt->vm); +} + +static int __intel_context_flush_retire(struct intel_context *ce) +{ + struct intel_timeline *tl; + + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + intel_context_timeline_unlock(tl); + return 0; +} + +static int __engines_record_defaults(struct intel_gt *gt) +{ + struct i915_request *requests[I915_NUM_ENGINES] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + for_each_engine(engine, gt, id) { + struct intel_renderstate so; + struct intel_context *ce; + struct i915_request *rq; + + /* We must be able to switch to something! */ + GEM_BUG_ON(!engine->kernel_context); + + err = intel_renderstate_init(&so, engine); + if (err) + goto out; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + intel_context_put(ce); + goto out; + } + + err = intel_engine_emit_ctx_wa(rq); + if (err) + goto err_rq; + + err = intel_renderstate_emit(&so, rq); + if (err) + goto err_rq; + +err_rq: + requests[id] = i915_request_get(rq); + i915_request_add(rq); + intel_renderstate_fini(&so); + if (err) + goto out; + } + + /* Flush the default context image to memory, and enable powersaving. */ + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + err = -EIO; + goto out; + } + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct i915_request *rq; + struct i915_vma *state; + void *vaddr; + + rq = requests[id]; + if (!rq) + continue; + + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); + state = rq->context->state; + if (!state) + continue; + + /* Serialise with retirement on another CPU */ + GEM_BUG_ON(!i915_request_completed(rq)); + err = __intel_context_flush_retire(rq->context); + if (err) + goto out; + + /* We want to be able to unbind the state from the GGTT */ + GEM_BUG_ON(intel_context_is_pinned(rq->context)); + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto out; + + i915_gem_object_lock(state->obj); + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + i915_gem_object_unlock(state->obj); + if (err) + goto out; + + i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); + + /* Check we can acquire the image of the context state */ + vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } + + rq->engine->default_state = i915_gem_object_get(state->obj); + i915_gem_object_unpin_map(state->obj); + } + +out: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. The quickest way we can accomplish + * this is by declaring ourselves wedged. + */ + if (err) + intel_gt_set_wedged(gt); + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct intel_context *ce; + struct i915_request *rq; + + rq = requests[id]; + if (!rq) + continue; + + ce = rq->context; + i915_request_put(rq); + intel_context_put(ce); + } + return err; +} + +static int __engines_verify_workarounds(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, gt, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + +static void __intel_gt_disable(struct intel_gt *gt) +{ + intel_gt_set_wedged_on_init(gt); + + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); +} + +int intel_gt_init(struct intel_gt *gt) +{ + int err; + + err = i915_inject_probe_error(gt->i915, -ENODEV); + if (err) + return err; + + /* + * This is just a security blanket to placate dragons. + * On some systems, we very sporadically observe that the first TLBs + * used by the CS may be stale, despite us poking the TLB reset. If + * we hold the forcewake during initialisation these problems + * just magically go away. + */ + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + goto out_fw; + + intel_gt_pm_init(gt); + + gt->vm = kernel_vm(gt); + if (!gt->vm) { + err = -ENOMEM; + goto err_pm; + } + + err = intel_engines_init(gt); + if (err) + goto err_engines; + + intel_uc_init(>->uc); + + err = intel_gt_resume(gt); + if (err) + goto err_uc_init; + + err = __engines_record_defaults(gt); + if (err) + goto err_gt; + + err = __engines_verify_workarounds(gt); + if (err) + goto err_gt; + + err = i915_inject_probe_error(gt->i915, -EIO); + if (err) + goto err_gt; + + goto out_fw; +err_gt: + __intel_gt_disable(gt); + intel_uc_fini_hw(>->uc); +err_uc_init: + intel_uc_fini(>->uc); +err_engines: + intel_engines_release(gt); + i915_vm_put(fetch_and_zero(>->vm)); +err_pm: + intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); +out_fw: + if (err) + intel_gt_set_wedged_on_init(gt); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + return err; +} + +void intel_gt_driver_remove(struct intel_gt *gt) +{ + __intel_gt_disable(gt); + + intel_uc_fini_hw(>->uc); + intel_uc_fini(>->uc); + + intel_engines_release(gt); +} + +void intel_gt_driver_unregister(struct intel_gt *gt) +{ + intel_rps_driver_unregister(>->rps); +} + +void intel_gt_driver_release(struct intel_gt *gt) +{ + struct i915_address_space *vm; + + vm = fetch_and_zero(>->vm); + if (vm) /* FIXME being called twice on error paths :( */ + i915_vm_put(vm); + + intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); +} + +void intel_gt_driver_late_release(struct intel_gt *gt) +{ + intel_uc_driver_late_release(>->uc); + intel_gt_fini_requests(gt); + intel_gt_fini_reset(gt); + intel_gt_fini_timelines(gt); + intel_engines_free(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h new file mode 100644 index 000000000000..1dac441cb8f4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_GT__ +#define __INTEL_GT__ + +#include "intel_engine_types.h" +#include "intel_gt_types.h" +#include "intel_reset.h" + +struct drm_i915_private; + +#define GT_TRACE(gt, fmt, ...) do { \ + const struct intel_gt *gt__ __maybe_unused = (gt); \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ + ##__VA_ARGS__); \ +} while (0) + +static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) +{ + return container_of(uc, struct intel_gt, uc); +} + +static inline struct intel_gt *guc_to_gt(struct intel_guc *guc) +{ + return container_of(guc, struct intel_gt, uc.guc); +} + +static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) +{ + return container_of(huc, struct intel_gt, uc.huc); +} + +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +int __must_check intel_gt_init_hw(struct intel_gt *gt); +int intel_gt_init(struct intel_gt *gt); +void intel_gt_driver_register(struct intel_gt *gt); + +void intel_gt_driver_unregister(struct intel_gt *gt); +void intel_gt_driver_remove(struct intel_gt *gt); +void intel_gt_driver_release(struct intel_gt *gt); + +void intel_gt_driver_late_release(struct intel_gt *gt); + +void intel_gt_check_and_clear_faults(struct intel_gt *gt); +void intel_gt_clear_error_registers(struct intel_gt *gt, + intel_engine_mask_t engine_mask); + +void intel_gt_flush_ggtt_writes(struct intel_gt *gt); +void intel_gt_chipset_flush(struct intel_gt *gt); + +static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, + enum intel_gt_scratch_field field) +{ + return i915_ggtt_offset(gt->scratch) + field; +} + +static inline bool intel_gt_is_wedged(const struct intel_gt *gt) +{ + return __intel_reset_failed(>->reset); +} + +static inline bool intel_gt_has_init_error(const struct intel_gt *gt) +{ + return test_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + +#endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c new file mode 100644 index 000000000000..f796bdf1ed30 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -0,0 +1,456 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/sched/clock.h> + +#include "i915_drv.h" +#include "i915_irq.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_uncore.h" +#include "intel_rps.h" + +static void guc_irq_handler(struct intel_guc *guc, u16 iir) +{ + if (iir & GUC_INTR_GUC2HOST) + intel_guc_to_host_event_handler(guc); +} + +static void +cs_irq_handler(struct intel_engine_cs *engine, u32 iir) +{ + bool tasklet = false; + + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) + tasklet = true; + + if (iir & GT_RENDER_USER_INTERRUPT) { + intel_engine_signal_breadcrumbs(engine); + tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); + } + + if (tasklet) + tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static u32 +gen11_gt_engine_identity(struct intel_gt *gt, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = gt->uncore->regs; + u32 timeout_ts; + u32 ident; + + lockdep_assert_held(>->irq_lock); + + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank)); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), + GEN11_INTR_DATA_VALID); + + return ident; +} + +static void +gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, + const u16 iir) +{ + if (instance == OTHER_GUC_INSTANCE) + return guc_irq_handler(>->uc.guc, iir); + + if (instance == OTHER_GTPM_INSTANCE) + return gen11_rps_irq_handler(>->rps, iir); + + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); +} + +static void +gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, + const u8 instance, const u16 iir) +{ + struct intel_engine_cs *engine; + + if (instance <= MAX_ENGINE_INSTANCE) + engine = gt->engine_class[class][instance]; + else + engine = NULL; + + if (likely(engine)) + return cs_irq_handler(engine, iir); + + WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", + class, instance); +} + +static void +gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) +{ + const u8 class = GEN11_INTR_ENGINE_CLASS(identity); + const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity); + const u16 intr = GEN11_INTR_ENGINE_INTR(identity); + + if (unlikely(!intr)) + return; + + if (class <= COPY_ENGINE_CLASS) + return gen11_engine_irq_handler(gt, class, instance, intr); + + if (class == OTHER_CLASS) + return gen11_other_irq_handler(gt, instance, intr); + + WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n", + class, instance, intr); +} + +static void +gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank) +{ + void __iomem * const regs = gt->uncore->regs; + unsigned long intr_dw; + unsigned int bit; + + lockdep_assert_held(>->irq_lock); + + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + + for_each_set_bit(bit, &intr_dw, 32) { + const u32 ident = gen11_gt_engine_identity(gt, bank, bit); + + gen11_gt_identity_handler(gt, ident); + } + + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); +} + +void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl) +{ + unsigned int bank; + + spin_lock(>->irq_lock); + + for (bank = 0; bank < 2; bank++) { + if (master_ctl & GEN11_GT_DW_IRQ(bank)) + gen11_gt_bank_handler(gt, bank); + } + + spin_unlock(>->irq_lock); +} + +bool gen11_gt_reset_one_iir(struct intel_gt *gt, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = gt->uncore->regs; + u32 dw; + + lockdep_assert_held(>->irq_lock); + + dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (dw & BIT(bit)) { + /* + * According to the BSpec, DW_IIR bits cannot be cleared without + * first servicing the Selector & Shared IIR registers. + */ + gen11_gt_engine_identity(gt, bank, bit); + + /* + * We locked GT INT DW by reading it. If we want to (try + * to) recover from this successfully, we need to clear + * our bit, otherwise we are locking the register for + * everybody. + */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit)); + + return true; + } + + return false; +} + +void gen11_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + /* Disable RCS, BCS, VCS and VECS class engines. */ + intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); + + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); +} + +void gen11_gt_irq_postinstall(struct intel_gt *gt) +{ + const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + struct intel_uncore *uncore = gt->uncore; + const u32 dmask = irqs << 16 | irqs; + const u32 smask = irqs << 16; + + BUILD_BUG_ON(irqs & 0xffff0000); + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask); + intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); + intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); + intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); + intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); + intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); + + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + gt->pm_ier = 0x0; + gt->pm_imr = ~gt->pm_ier; + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); + + /* Same thing for GuC interrupts */ + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); +} + +void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) +{ + if (gt_iir & GT_RENDER_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + if (gt_iir & ILK_BSD_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); +} + +static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) +{ + if (!HAS_L3_DPF(gt->i915)) + return; + + spin_lock(>->irq_lock); + gen5_gt_disable_irq(gt, GT_PARITY_ERROR(gt->i915)); + spin_unlock(>->irq_lock); + + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1) + gt->i915->l3_parity.which_slice |= 1 << 1; + + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) + gt->i915->l3_parity.which_slice |= 1 << 0; + + schedule_work(>->i915->l3_parity.error_work); +} + +void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) +{ + if (gt_iir & GT_RENDER_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + if (gt_iir & GT_BSD_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); + if (gt_iir & GT_BLT_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]); + + if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | + GT_BSD_CS_ERROR_INTERRUPT | + GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) + DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir); + + if (gt_iir & GT_PARITY_ERROR(gt->i915)) + gen7_parity_error_irq_handler(gt, gt_iir); +} + +void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) +{ + void __iomem * const regs = gt->uncore->regs; + + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); + if (likely(gt_iir[0])) + raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); + } + + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); + if (likely(gt_iir[1])) + raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); + } + + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { + gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); + if (likely(gt_iir[2])) + raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); + if (likely(gt_iir[3])) + raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); + } +} + +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) +{ + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + cs_irq_handler(gt->engine_class[RENDER_CLASS][0], + gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0], + gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); + } + + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0], + gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1], + gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], + gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); + } + + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { + gen6_rps_irq_handler(>->rps, gt_iir[2]); + guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); + } +} + +void gen8_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + GEN8_IRQ_RESET_NDX(uncore, GT, 0); + GEN8_IRQ_RESET_NDX(uncore, GT, 1); + GEN8_IRQ_RESET_NDX(uncore, GT, 2); + GEN8_IRQ_RESET_NDX(uncore, GT, 3); +} + +void gen8_gt_irq_postinstall(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + /* These are interrupts we'll toggle with the ring mask register */ + u32 gt_interrupts[] = { + (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), + + (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), + + 0, + + (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + }; + + gt->pm_ier = 0x0; + gt->pm_imr = ~gt->pm_ier; + GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]); + GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]); + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. Same wil be the case for GuC interrupts. + */ + GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier); + GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]); +} + +static void gen5_gt_update_irq(struct intel_gt *gt, + u32 interrupt_mask, + u32 enabled_irq_mask) +{ + lockdep_assert_held(>->irq_lock); + + GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask); + + gt->gt_imr &= ~interrupt_mask; + gt->gt_imr |= (~enabled_irq_mask & interrupt_mask); + intel_uncore_write(gt->uncore, GTIMR, gt->gt_imr); +} + +void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask) +{ + gen5_gt_update_irq(gt, mask, mask); + intel_uncore_posting_read_fw(gt->uncore, GTIMR); +} + +void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask) +{ + gen5_gt_update_irq(gt, mask, 0); +} + +void gen5_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + GEN3_IRQ_RESET(uncore, GT); + if (INTEL_GEN(gt->i915) >= 6) + GEN3_IRQ_RESET(uncore, GEN6_PM); +} + +void gen5_gt_irq_postinstall(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + u32 pm_irqs = 0; + u32 gt_irqs = 0; + + gt->gt_imr = ~0; + if (HAS_L3_DPF(gt->i915)) { + /* L3 parity interrupt is always unmasked. */ + gt->gt_imr = ~GT_PARITY_ERROR(gt->i915); + gt_irqs |= GT_PARITY_ERROR(gt->i915); + } + + gt_irqs |= GT_RENDER_USER_INTERRUPT; + if (IS_GEN(gt->i915, 5)) + gt_irqs |= ILK_BSD_USER_INTERRUPT; + else + gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; + + GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs); + + if (INTEL_GEN(gt->i915) >= 6) { + /* + * RPS interrupts will get enabled/disabled on demand when RPS + * itself is enabled/disabled. + */ + if (HAS_ENGINE(gt->i915, VECS0)) { + pm_irqs |= PM_VEBOX_USER_INTERRUPT; + gt->pm_ier |= PM_VEBOX_USER_INTERRUPT; + } + + gt->pm_imr = 0xffffffff; + GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h new file mode 100644 index 000000000000..8f37593712c9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_IRQ_H +#define INTEL_GT_IRQ_H + +#include <linux/types.h> + +struct intel_gt; + +#define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ + GEN8_GT_BCS_IRQ | \ + GEN8_GT_VCS0_IRQ | \ + GEN8_GT_VCS1_IRQ | \ + GEN8_GT_VECS_IRQ | \ + GEN8_GT_PM_IRQ | \ + GEN8_GT_GUC_IRQ) + +void gen11_gt_irq_reset(struct intel_gt *gt); +void gen11_gt_irq_postinstall(struct intel_gt *gt); +void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl); + +bool gen11_gt_reset_one_iir(struct intel_gt *gt, + const unsigned int bank, + const unsigned int bit); + +void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir); + +void gen5_gt_irq_postinstall(struct intel_gt *gt); +void gen5_gt_irq_reset(struct intel_gt *gt); +void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask); +void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask); + +void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir); + +void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); +void gen8_gt_irq_reset(struct intel_gt *gt); +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); +void gen8_gt_irq_postinstall(struct intel_gt *gt); + +#endif /* INTEL_GT_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 9f8f7f54191f..d1c2f034296a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -4,23 +4,48 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/suspend.h> + #include "i915_drv.h" +#include "i915_globals.h" +#include "i915_params.h" +#include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_llc.h" #include "intel_pm.h" +#include "intel_rc6.h" +#include "intel_rps.h" #include "intel_wakeref.h" -static void pm_notify(struct drm_i915_private *i915, int state) +static void user_forcewake(struct intel_gt *gt, bool suspend) { - blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); } -static int intel_gt_unpark(struct intel_wakeref *wf) +static int __gt_unpark(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + struct drm_i915_private *i915 = gt->i915; + + GT_TRACE(gt, "\n"); - GEM_TRACE("\n"); + i915_globals_unpark(); /* * It seems that the DMC likes to transition between the DC states a lot @@ -33,97 +58,132 @@ static int intel_gt_unpark(struct intel_wakeref *wf) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - intel_enable_gt_powersave(i915); - - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); + gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!gt->awake); + intel_rc6_unpark(>->rc6); + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); - i915_queue_hangcheck(i915); - - pm_notify(i915, INTEL_GT_UNPARK); + intel_gt_unpark_requests(gt); return 0; } -void intel_gt_pm_get(struct drm_i915_private *i915) +static int __gt_park(struct intel_wakeref *wf) { - intel_wakeref_get(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_unpark); -} + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + intel_wakeref_t wakeref = fetch_and_zero(>->awake); + struct drm_i915_private *i915 = gt->i915; -static int intel_gt_park(struct intel_wakeref *wf) -{ - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); - intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + GT_TRACE(gt, "\n"); - GEM_TRACE("\n"); - - pm_notify(i915, INTEL_GT_PARK); + intel_gt_park_requests(gt); + i915_vma_parked(gt); i915_pmu_gt_parked(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); + intel_rps_park(>->rps); + intel_rc6_park(>->rc6); + + /* Everything switched off, flush any residual interrupt just in case */ + intel_synchronize_irq(i915); + /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + i915_globals_park(); return 0; } -void intel_gt_pm_put(struct drm_i915_private *i915) +static const struct intel_wakeref_ops wf_ops = { + .get = __gt_unpark, + .put = __gt_park, +}; + +void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_put(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_park); + intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); } -void intel_gt_pm_init(struct drm_i915_private *i915) +void intel_gt_pm_init(struct intel_gt *gt) { - intel_wakeref_init(&i915->gt.wakeref); - BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications); + /* + * Enabling power-management should be "self-healing". If we cannot + * enable a feature, simply leave it disabled with a notice to the + * user. + */ + intel_rc6_init(>->rc6); + intel_rps_init(>->rps); } -static bool reset_engines(struct drm_i915_private *i915) +static bool reset_engines(struct intel_gt *gt) { - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) return false; - return intel_gpu_reset(i915, ALL_ENGINES) == 0; + return __intel_gt_reset(gt, ALL_ENGINES) == 0; } -/** - * intel_gt_sanitize: called after the GPU has lost power - * @i915: the i915 device - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_gt_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_gt_sanitize(struct drm_i915_private *i915, bool force) +static void gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; - GEM_TRACE("\n"); + GT_TRACE(gt, "force:%s", yesno(force)); - if (!reset_engines(i915) && !force) - return; + /* Use a raw wakeref to avoid calling intel_display_power_get early */ + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (intel_gt_is_wedged(gt)) + intel_gt_unset_wedged(gt); + + intel_uc_sanitize(>->uc); + + for_each_engine(engine, gt, id) + if (engine->reset.prepare) + engine->reset.prepare(engine); + + intel_uc_reset_prepare(>->uc); + + if (reset_engines(gt) || force) { + for_each_engine(engine, gt, id) + __intel_engine_reset(engine, false); + } + + for_each_engine(engine, gt, id) + if (engine->reset.finish) + engine->reset.finish(engine); + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); +} + +void intel_gt_pm_fini(struct intel_gt *gt) +{ + intel_rc6_fini(>->rc6); } -int intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - int err = 0; + int err; + + err = intel_gt_has_init_error(gt); + if (err) + return err; + + GT_TRACE(gt, "\n"); /* * After resume, we may need to poke into the pinned kernel @@ -131,32 +191,143 @@ int intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - struct intel_context *ce; + intel_gt_pm_get(gt); + + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_rc6_sanitize(>->rc6); + gt_sanitize(gt, true); + if (intel_gt_is_wedged(gt)) { + err = -EIO; + goto out_fw; + } - intel_engine_pm_get(engine); + /* Only when the HW is re-initialised, can we replay the requests */ + err = intel_gt_init_hw(gt); + if (err) { + dev_err(gt->i915->drm.dev, + "Failed to initialize GPU, declaring it wedged!\n"); + goto err_wedged; + } - ce = engine->kernel_context; - if (ce) - ce->ops->reset(ce); + intel_rps_enable(>->rps); + intel_llc_enable(>->llc); - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); engine->serial++; /* kernel context lost */ err = engine->resume(engine); intel_engine_pm_put(engine); if (err) { - dev_err(i915->drm.dev, + dev_err(gt->i915->drm.dev, "Failed to restart %s (%d)\n", engine->name, err); - break; + goto err_wedged; } } - intel_gt_pm_put(i915); + intel_rc6_enable(>->rc6); + + intel_uc_resume(>->uc); + + user_forcewake(gt, false); + +out_fw: + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_gt_pm_put(gt); return err; + +err_wedged: + intel_gt_set_wedged(gt); + goto out_fw; +} + +static void wait_for_suspend(struct intel_gt *gt) +{ + if (!intel_gt_pm_is_awake(gt)) + return; + + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + intel_gt_retire_requests(gt); + } + + intel_gt_pm_wait_for_idle(gt); +} + +void intel_gt_suspend_prepare(struct intel_gt *gt) +{ + user_forcewake(gt, true); + wait_for_suspend(gt); + + intel_uc_suspend(>->uc); +} + +static suspend_state_t pm_suspend_target(void) +{ +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) + return pm_suspend_target_state; +#else + return PM_SUSPEND_TO_IDLE; +#endif +} + +void intel_gt_suspend_late(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + + /* We expect to be idle already; but also want to be independent */ + wait_for_suspend(gt); + + if (is_mock_gt(gt)) + return; + + GEM_BUG_ON(gt->awake); + + /* + * On disabling the device, we want to turn off HW access to memory + * that we no longer own. + * + * However, not all suspend-states disable the device. S0 (s2idle) + * is effectively runtime-suspend, the device is left powered on + * but needs to be put into a low power state. We need to keep + * powermanagement enabled, but we also retain system state and so + * it remains safe to keep on using our allocated memory. + */ + if (pm_suspend_target() == PM_SUSPEND_TO_IDLE) + return; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + intel_rps_disable(>->rps); + intel_rc6_disable(>->rc6); + intel_llc_disable(>->llc); + } + + gt_sanitize(gt, false); + + GT_TRACE(gt, "\n"); +} + +void intel_gt_runtime_suspend(struct intel_gt *gt) +{ + intel_uc_runtime_suspend(>->uc); + + GT_TRACE(gt, "\n"); } + +int intel_gt_runtime_resume(struct intel_gt *gt) +{ + GT_TRACE(gt, "\n"); + intel_gt_init_swizzling(gt); + + return intel_uc_runtime_resume(>->uc); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_gt_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 53f342b20181..60f0e2fbe55c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -9,19 +9,58 @@ #include <linux/types.h> -struct drm_i915_private; +#include "intel_gt_types.h" +#include "intel_wakeref.h" -enum { - INTEL_GT_UNPARK, - INTEL_GT_PARK, -}; +static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) +{ + return intel_wakeref_is_active(>->wakeref); +} -void intel_gt_pm_get(struct drm_i915_private *i915); -void intel_gt_pm_put(struct drm_i915_private *i915); +static inline void intel_gt_pm_get(struct intel_gt *gt) +{ + intel_wakeref_get(>->wakeref); +} -void intel_gt_pm_init(struct drm_i915_private *i915); +static inline void __intel_gt_pm_get(struct intel_gt *gt) +{ + __intel_wakeref_get(>->wakeref); +} -void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -int intel_gt_resume(struct drm_i915_private *i915); +static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) +{ + return intel_wakeref_get_if_active(>->wakeref); +} + +static inline void intel_gt_pm_put(struct intel_gt *gt) +{ + intel_wakeref_put(>->wakeref); +} + +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + +static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) +{ + return intel_wakeref_wait_for_idle(>->wakeref); +} + +void intel_gt_pm_init_early(struct intel_gt *gt); +void intel_gt_pm_init(struct intel_gt *gt); +void intel_gt_pm_fini(struct intel_gt *gt); + +void intel_gt_suspend_prepare(struct intel_gt *gt); +void intel_gt_suspend_late(struct intel_gt *gt); +int intel_gt_resume(struct intel_gt *gt); + +void intel_gt_runtime_suspend(struct intel_gt *gt); +int intel_gt_runtime_resume(struct intel_gt *gt); + +static inline bool is_mock_gt(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -ENODEV); +} #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c new file mode 100644 index 000000000000..babe866126d7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_reg.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" + +static void write_pm_imr(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 mask = gt->pm_imr; + i915_reg_t reg; + + if (INTEL_GEN(i915) >= 11) { + reg = GEN11_GPM_WGBOXPERF_INTR_MASK; + mask <<= 16; /* pm is in upper half */ + } else if (INTEL_GEN(i915) >= 8) { + reg = GEN8_GT_IMR(2); + } else { + reg = GEN6_PMIMR; + } + + intel_uncore_write(uncore, reg, mask); +} + +static void gen6_gt_pm_update_irq(struct intel_gt *gt, + u32 interrupt_mask, + u32 enabled_irq_mask) +{ + u32 new_val; + + WARN_ON(enabled_irq_mask & ~interrupt_mask); + + lockdep_assert_held(>->irq_lock); + + new_val = gt->pm_imr; + new_val &= ~interrupt_mask; + new_val |= ~enabled_irq_mask & interrupt_mask; + + if (new_val != gt->pm_imr) { + gt->pm_imr = new_val; + write_pm_imr(gt); + } +} + +void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask) +{ + gen6_gt_pm_update_irq(gt, mask, mask); +} + +void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask) +{ + gen6_gt_pm_update_irq(gt, mask, 0); +} + +void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask) +{ + struct intel_uncore *uncore = gt->uncore; + i915_reg_t reg = INTEL_GEN(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; + + lockdep_assert_held(>->irq_lock); + + intel_uncore_write(uncore, reg, reset_mask); + intel_uncore_write(uncore, reg, reset_mask); + intel_uncore_posting_read(uncore, reg); +} + +static void write_pm_ier(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 mask = gt->pm_ier; + i915_reg_t reg; + + if (INTEL_GEN(i915) >= 11) { + reg = GEN11_GPM_WGBOXPERF_INTR_ENABLE; + mask <<= 16; /* pm is in upper half */ + } else if (INTEL_GEN(i915) >= 8) { + reg = GEN8_GT_IER(2); + } else { + reg = GEN6_PMIER; + } + + intel_uncore_write(uncore, reg, mask); +} + +void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask) +{ + lockdep_assert_held(>->irq_lock); + + gt->pm_ier |= enable_mask; + write_pm_ier(gt); + gen6_gt_pm_unmask_irq(gt, enable_mask); +} + +void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask) +{ + lockdep_assert_held(>->irq_lock); + + gt->pm_ier &= ~disable_mask; + gen6_gt_pm_mask_irq(gt, disable_mask); + write_pm_ier(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h new file mode 100644 index 000000000000..b29816a04809 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_IRQ_H +#define INTEL_GT_PM_IRQ_H + +#include <linux/types.h> + +struct intel_gt; + +void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask); +void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask); + +void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask); +void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask); + +void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask); + +#endif /* INTEL_GT_PM_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c new file mode 100644 index 000000000000..7ef1d37970f6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -0,0 +1,225 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/workqueue.h> + +#include "i915_drv.h" /* for_each_engine() */ +#include "i915_request.h" +#include "intel_engine_heartbeat.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_timeline.h" + +static bool retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + return false; + + /* And check nothing new was submitted */ + return !i915_active_fence_isset(&tl->last_request); +} + +static bool flush_submission(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool active = false; + + if (!intel_gt_pm_is_awake(gt)) + return false; + + for_each_engine(engine, gt, id) { + intel_engine_flush_submission(engine); + active |= flush_work(&engine->retire_work); + active |= flush_work(&engine->wakeref.work); + } + + return active; +} + +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ +#define STUB ((struct intel_timeline *)1) + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + */ + + if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + unsigned long active_count = 0; + bool interruptible; + LIST_HEAD(free); + + interruptible = true; + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + flush_submission(gt); /* kick the ksoftirqd tasklets */ + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) { + active_count++; /* report busy to caller, try again? */ + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ + spin_unlock(&timelines->lock); + + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + interruptible, + timeout); + dma_fence_put(fence); + } + } + + if (!retire_requests(tl) || flush_submission(gt)) + active_count++; + + spin_lock(&timelines->lock); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(atomic_read(&tl->active_count)); + list_add(&tl->link, &free); + } + } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); + + return active_count ? timeout : 0; +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout; +} + +static void retire_work_handler(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), requests.retire_work.work); + + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); + intel_gt_retire_requests(gt); +} + +void intel_gt_init_requests(struct intel_gt *gt) +{ + INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler); +} + +void intel_gt_park_requests(struct intel_gt *gt) +{ + cancel_delayed_work(>->requests.retire_work); +} + +void intel_gt_unpark_requests(struct intel_gt *gt) +{ + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} + +void intel_gt_fini_requests(struct intel_gt *gt) +{ + /* Wait until the work is marked as finished before unloading! */ + cancel_delayed_work_sync(>->requests.retire_work); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h new file mode 100644 index 000000000000..dbac53baf1cb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_REQUESTS_H +#define INTEL_GT_REQUESTS_H + +struct intel_engine_cs; +struct intel_gt; +struct intel_timeline; + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +static inline void intel_gt_retire_requests(struct intel_gt *gt) +{ + intel_gt_retire_requests_timeout(gt, 0); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + +void intel_gt_init_requests(struct intel_gt *gt); +void intel_gt_park_requests(struct intel_gt *gt); +void intel_gt_unpark_requests(struct intel_gt *gt); +void intel_gt_fini_requests(struct intel_gt *gt); + +#endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h new file mode 100644 index 000000000000..96890dd12b5f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_GT_TYPES__ +#define __INTEL_GT_TYPES__ + +#include <linux/ktime.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "uc/intel_uc.h" + +#include "i915_vma.h" +#include "intel_engine_types.h" +#include "intel_llc_types.h" +#include "intel_reset_types.h" +#include "intel_rc6_types.h" +#include "intel_rps_types.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct i915_ggtt; +struct intel_engine_cs; +struct intel_uncore; + +struct intel_gt { + struct drm_i915_private *i915; + struct intel_uncore *uncore; + struct i915_ggtt *ggtt; + + struct intel_uc uc; + + struct intel_gt_timelines { + spinlock_t lock; /* protects active_list */ + struct list_head active_list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct list_head hwsp_free_list; + } timelines; + + struct intel_gt_requests { + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; + } requests; + + struct intel_wakeref wakeref; + atomic_t user_wakeref; + + struct list_head closed_vma; + spinlock_t closed_lock; /* guards the list of closed_vma */ + + struct intel_reset reset; + + /** + * Is the GPU currently considered idle, or busy executing + * userspace requests? Whilst idle, we allow runtime power + * management to power down the hardware and display clocks. + * In order to reduce the effect on performance, there + * is a slight delay before we do so. + */ + intel_wakeref_t awake; + + struct intel_llc llc; + struct intel_rc6 rc6; + struct intel_rps rps; + + ktime_t last_init_time; + + struct i915_vma *scratch; + + spinlock_t irq_lock; + u32 gt_imr; + u32 pm_ier; + u32 pm_imr; + + u32 pm_guc_events; + + struct intel_engine_cs *engine[I915_NUM_ENGINES]; + struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] + [MAX_ENGINE_INSTANCE + 1]; + + /* + * Default address space (either GGTT or ppGTT depending on arch). + * + * Reserved for exclusive use by the kernel. + */ + struct i915_address_space *vm; +}; + +enum intel_gt_scratch_field { + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, + + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, + + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + + /* 6 * 8 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, + + /* 4 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, +}; + +#endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c new file mode 100644 index 000000000000..16acdc5d6734 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> + +#include "i915_trace.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +void stash_init(struct pagestash *stash) +{ + pagevec_init(&stash->pvec); + spin_lock_init(&stash->lock); +} + +static struct page *stash_pop_page(struct pagestash *stash) +{ + struct page *page = NULL; + + spin_lock(&stash->lock); + if (likely(stash->pvec.nr)) + page = stash->pvec.pages[--stash->pvec.nr]; + spin_unlock(&stash->lock); + + return page; +} + +static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) +{ + unsigned int nr; + + spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); + + nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); + memcpy(stash->pvec.pages + stash->pvec.nr, + pvec->pages + pvec->nr - nr, + sizeof(pvec->pages[0]) * nr); + stash->pvec.nr += nr; + + spin_unlock(&stash->lock); + + pvec->nr -= nr; +} + +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) +{ + struct pagevec stack; + struct page *page; + + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); + + page = stash_pop_page(&vm->free_pages); + if (page) + return page; + + if (!vm->pt_kmap_wc) + return alloc_page(gfp); + + /* Look in our global stash of WC pages... */ + page = stash_pop_page(&vm->i915->mm.wc_stash); + if (page) + return page; + + /* + * Otherwise batch allocate pages to amortize cost of set_pages_wc. + * + * We have to be careful as page allocation may trigger the shrinker + * (via direct reclaim) which will fill up the WC stash underneath us. + * So we add our WB pages into a temporary pvec on the stack and merge + * them into the WC stash after all the allocations are complete. + */ + pagevec_init(&stack); + do { + struct page *page; + + page = alloc_page(gfp); + if (unlikely(!page)) + break; + + stack.pages[stack.nr++] = page; + } while (pagevec_space(&stack)); + + if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { + page = stack.pages[--stack.nr]; + + /* Merge spare WC pages to the global stash */ + if (stack.nr) + stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); + + /* Push any surplus WC pages onto the local VM stash */ + if (stack.nr) + stash_push_pagevec(&vm->free_pages, &stack); + } + + /* Return unwanted leftovers */ + if (unlikely(stack.nr)) { + WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); + __pagevec_release(&stack); + } + + return page; +} + +static void vm_free_pages_release(struct i915_address_space *vm, + bool immediate) +{ + struct pagevec *pvec = &vm->free_pages.pvec; + struct pagevec stack; + + lockdep_assert_held(&vm->free_pages.lock); + GEM_BUG_ON(!pagevec_count(pvec)); + + if (vm->pt_kmap_wc) { + /* + * When we use WC, first fill up the global stash and then + * only if full immediately free the overflow. + */ + stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); + + /* + * As we have made some room in the VM's free_pages, + * we can wait for it to fill again. Unless we are + * inside i915_address_space_fini() and must + * immediately release the pages! + */ + if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) + return; + + /* + * We have to drop the lock to allow ourselves to sleep, + * so take a copy of the pvec and clear the stash for + * others to use it as we sleep. + */ + stack = *pvec; + pagevec_reinit(pvec); + spin_unlock(&vm->free_pages.lock); + + pvec = &stack; + set_pages_array_wb(pvec->pages, pvec->nr); + + spin_lock(&vm->free_pages.lock); + } + + __pagevec_release(pvec); +} + +static void vm_free_page(struct i915_address_space *vm, struct page *page) +{ + /* + * On !llc, we need to change the pages back to WB. We only do so + * in bulk, so we rarely need to change the page attributes here, + * but doing so requires a stop_machine() from deep inside arch/x86/mm. + * To make detection of the possible sleep more likely, use an + * unconditional might_sleep() for everybody. + */ + might_sleep(); + spin_lock(&vm->free_pages.lock); + while (!pagevec_space(&vm->free_pages.pvec)) + vm_free_pages_release(vm, false); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); + pagevec_add(&vm->free_pages.pvec, page); + spin_unlock(&vm->free_pages.lock); +} + +void __i915_vm_close(struct i915_address_space *vm) +{ + struct i915_vma *vma, *vn; + + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + __i915_vma_put(vma); + + i915_gem_object_put(obj); + } + GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); +} + +void i915_address_space_fini(struct i915_address_space *vm) +{ + spin_lock(&vm->free_pages.lock); + if (pagevec_count(&vm->free_pages.pvec)) + vm_free_pages_release(vm, true); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); + spin_unlock(&vm->free_pages.lock); + + drm_mm_takedown(&vm->mm); + + mutex_destroy(&vm->mutex); +} + +static void __i915_vm_release(struct work_struct *work) +{ + struct i915_address_space *vm = + container_of(work, struct i915_address_space, rcu.work); + + vm->cleanup(vm); + i915_address_space_fini(vm); + + kfree(vm); +} + +void i915_vm_release(struct kref *kref) +{ + struct i915_address_space *vm = + container_of(kref, struct i915_address_space, ref); + + GEM_BUG_ON(i915_is_ggtt(vm)); + trace_i915_ppgtt_release(vm); + + queue_rcu_work(vm->i915->wq, &vm->rcu); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass) +{ + kref_init(&vm->ref); + INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); + + /* + * The vm->mutex must be reclaim safe (for use in the shrinker). + * Do a dummy acquire now under fs_reclaim so that any allocation + * attempt holding the lock is immediately reported by lockdep. + */ + mutex_init(&vm->mutex); + lockdep_set_subclass(&vm->mutex, subclass); + i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + + GEM_BUG_ON(!vm->total); + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + + stash_init(&vm->free_pages); + + INIT_LIST_HEAD(&vm->bound_list); +} + +void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page_attrs(vm->dma, + p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } + + return 0; +} + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + return __setup_page_dma(vm, p, __GFP_HIGHMEM); +} + +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); +} + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) +{ + kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); +} + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) +{ + unsigned long size; + + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). However, as we share the + * scratch (read-only) between all vm, we create one 64k scratch page + * for all. + */ + size = I915_GTT_PAGE_SIZE_4K; + if (i915_vm_is_4lvl(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + size = I915_GTT_PAGE_SIZE_64K; + gfp |= __GFP_NOWARN; + } + gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; + + do { + unsigned int order = get_order(size); + struct page *page; + dma_addr_t addr; + + page = alloc_pages(gfp, order); + if (unlikely(!page)) + goto skip; + + addr = dma_map_page_attrs(vm->dma, + page, 0, size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, addr))) + goto free_page; + + if (unlikely(!IS_ALIGNED(addr, size))) + goto unmap_page; + + vm->scratch[0].base.page = page; + vm->scratch[0].base.daddr = addr; + vm->scratch_order = order; + return 0; + +unmap_page: + dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); +free_page: + __free_pages(page, order); +skip: + if (size == I915_GTT_PAGE_SIZE_4K) + return -ENOMEM; + + size = I915_GTT_PAGE_SIZE_4K; + gfp &= ~__GFP_NOWARN; + } while (1); +} + +void cleanup_scratch_page(struct i915_address_space *vm) +{ + struct i915_page_dma *p = px_base(&vm->scratch[0]); + unsigned int order = vm->scratch_order; + + dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, order); +} + +void free_scratch(struct i915_address_space *vm) +{ + int i; + + if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ + return; + + for (i = 1; i <= vm->top; i++) { + if (!px_dma(&vm->scratch[i])) + break; + cleanup_page_dma(vm, px_base(&vm->scratch[i])); + } + + cleanup_scratch_page(vm); +} + +void gtt_write_workarounds(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + + /* + * This function is for gtt related workarounds. This function is + * called on driver load and after a GPU reset, so you can place + * workarounds here even if they get overwritten by GPU reset. + */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ + if (IS_BROADWELL(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); + else if (IS_CHERRYVIEW(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); + else if (IS_GEN9_LP(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(i915) <= 10) + intel_uncore_rmw(uncore, + GEN8_GAMW_ECO_DEV_RW_IA, + 0, + GAMW_ECO_ENABLE_64K_IPS_FIELD); + + if (IS_GEN_RANGE(i915, 8, 11)) { + bool can_use_gtt_cache = true; + + /* + * According to the BSpec if we use 2M/1G pages then we also + * need to disable the GTT cache. At least on BDW we can see + * visual corruption when using 2M pages, and not disabling the + * GTT cache. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) + can_use_gtt_cache = false; + + /* WaGttCachingOffByDefault */ + intel_uncore_write(uncore, + HSW_GTT_CACHE_EN, + can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); + WARN_ON_ONCE(can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); + } +} + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + + switch (level) { + case I915_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + return pte; +} + +static void tgl_setup_private_ppat(struct intel_uncore *uncore) +{ + /* TGL doesn't support LLC or AGE settings */ + intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); +} + +static void cnl_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, + GEN10_PAT_INDEX(0), + GEN8_PPAT_WB | GEN8_PPAT_LLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(1), + GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(2), + GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(3), + GEN8_PPAT_UC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(4), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(5), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(6), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(7), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); +} + +/* + * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability + * bits. When using advanced contexts each context stores its own PAT, but + * writing this data shouldn't be harmful even in those cases. + */ +static void bdw_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ + GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ + GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ + GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ + GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | + GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | + GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | + GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +static void chv_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + /* + * Map WB on BDW to snooped on CHV. + * + * Only the snoop bit has meaning for CHV, the rest is + * ignored. + * + * The hardware will never snoop for certain types of accesses: + * - CPU GTT (GMADR->GGTT->no snoop->memory) + * - PPGTT page tables + * - some other special cycles + * + * As with BDW, we also need to consider the following for GT accesses: + * "For GGTT, there is NO pat_sel[2:0] from the entry, + * so RTL will always use the value corresponding to + * pat_sel = 000". + * Which means we must set the snoop bit in PAT entry 0 + * in order to keep the global status page working. + */ + + pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | + GEN8_PPAT(1, 0) | + GEN8_PPAT(2, 0) | + GEN8_PPAT(3, 0) | + GEN8_PPAT(4, CHV_PPAT_SNOOP) | + GEN8_PPAT(5, CHV_PPAT_SNOOP) | + GEN8_PPAT(6, CHV_PPAT_SNOOP) | + GEN8_PPAT(7, CHV_PPAT_SNOOP); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +void setup_private_pat(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + if (INTEL_GEN(i915) >= 12) + tgl_setup_private_ppat(uncore); + else if (INTEL_GEN(i915) >= 10) + cnl_setup_private_ppat(uncore); + else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) + chv_setup_private_ppat(uncore); + else + bdw_setup_private_ppat(uncore); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h new file mode 100644 index 000000000000..7da7681c20b1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -0,0 +1,587 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + * + * Please try to maintain the following order within this file unless it makes + * sense to do otherwise. From top to bottom: + * 1. typedefs + * 2. #defines, and macros + * 3. structure definitions + * 4. function prototypes + * + * Within each section, please try to order by generation in ascending order, + * from top to bottom (ie. gen6 on the top, gen8 on the bottom). + */ + +#ifndef __INTEL_GTT_H__ +#define __INTEL_GTT_H__ + +#include <linux/io-mapping.h> +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/pagevec.h> +#include <linux/scatterlist.h> +#include <linux/workqueue.h> + +#include <drm/drm_mm.h> + +#include "gt/intel_reset.h" +#include "i915_gem_fence_reg.h" +#include "i915_selftest.h" +#include "i915_vma_types.h" + +#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) +#define DBG(...) trace_printk(__VA_ARGS__) +#else +#define DBG(...) +#endif + +#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ + +#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) +#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) +#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + +#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE + +#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE + +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; + +#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) + +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) +#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) +#define I915_PDES 512 +#define I915_PDE_MASK (I915_PDES - 1) + +/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ +#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) +#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PTE_CACHE_LLC (2 << 1) +#define GEN6_PTE_UNCACHED (1 << 1) +#define GEN6_PTE_VALID REG_BIT(0) + +#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) +#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) +#define GEN6_PD_ALIGN (PAGE_SIZE * 16) +#define GEN6_PDE_SHIFT 22 +#define GEN6_PDE_VALID REG_BIT(0) +#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) + +#define GEN7_PTE_CACHE_L3_LLC (3 << 1) + +#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2) +#define BYT_PTE_WRITEABLE REG_BIT(1) + +/* + * Cacheability Control is a 4-bit value. The low three bits are stored in bits + * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. + */ +#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ + (((bits) & 0x8) << (11 - 3))) +#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) +#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) +#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) +#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +#define HSW_PTE_UNCACHED (0) +#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) +#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) + +/* + * GEN8 32b style address is defined as a 3 level page table: + * 31:30 | 29:21 | 20:12 | 11:0 + * PDPE | PDE | PTE | offset + * The difference as compared to normal x86 3 level page table is the PDPEs are + * programmed via register. + * + * GEN8 48b style address is defined as a 4 level page table: + * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 + * PML4E | PDPE | PDE | PTE | offset + */ +#define GEN8_3LVL_PDPES 4 + +#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE 0 /* WB LLC */ +#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ + +#define CHV_PPAT_SNOOP REG_BIT(6) +#define GEN8_PPAT_AGE(x) ((x)<<4) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) + +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + +#define for_each_sgt_daddr(__dp, __iter, __sgt) \ + __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) + +struct i915_page_dma { + struct page *page; + union { + dma_addr_t daddr; + + /* + * For gen6/gen7 only. This is the offset in the GGTT + * where the page directory entries for PPGTT begin + */ + u32 ggtt_offset; + }; +}; + +struct i915_page_scratch { + struct i915_page_dma base; + u64 encode; +}; + +struct i915_page_table { + struct i915_page_dma base; + atomic_t used; +}; + +struct i915_page_directory { + struct i915_page_table pt; + spinlock_t lock; + void *entry[512]; +}; + +#define __px_choose_expr(x, type, expr, other) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), type) || \ + __builtin_types_compatible_p(typeof(x), const type), \ + ({ type __x = (type)(x); expr; }), \ + other) + +#define px_base(px) \ + __px_choose_expr(px, struct i915_page_dma *, __x, \ + __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ + __px_choose_expr(px, struct i915_page_table *, &__x->base, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ + (void)0)))) +#define px_dma(px) (px_base(px)->daddr) + +#define px_pt(px) \ + __px_choose_expr(px, struct i915_page_table *, __x, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ + (void)0)) +#define px_used(px) (&px_pt(px)->used) + +enum i915_cache_level; + +struct drm_i915_file_private; +struct drm_i915_gem_object; +struct i915_vma; +struct intel_gt; + +struct i915_vma_ops { + /* Map an object into an address space with the given cache flags. */ + int (*bind_vma)(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + /* + * Unmap an object from an address space. This usually consists of + * setting the valid PTE entries to a reserved scratch page. + */ + void (*unbind_vma)(struct i915_vma *vma); + + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); +}; + +struct pagestash { + spinlock_t lock; + struct pagevec pvec; +}; + +void stash_init(struct pagestash *stash); + +struct i915_address_space { + struct kref ref; + struct rcu_work rcu; + + struct drm_mm mm; + struct intel_gt *gt; + struct drm_i915_private *i915; + struct device *dma; + /* + * Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; + u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ + + unsigned int bind_async_flags; + + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; + + struct mutex mutex; /* protects vma and our lists */ +#define VM_CLASS_GGTT 0 +#define VM_CLASS_PPGTT 1 + + struct i915_page_scratch scratch[4]; + unsigned int scratch_order; + unsigned int top; + + /** + * List of vma currently bound. + */ + struct list_head bound_list; + + struct pagestash free_pages; + + /* Global GTT */ + bool is_ggtt:1; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; + + u64 (*pte_encode)(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); /* Create a valid PTE */ +#define PTE_READ_ONLY BIT(0) + + int (*allocate_va_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*clear_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*insert_page)(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 flags); + void (*insert_entries)(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + void (*cleanup)(struct i915_address_space *vm); + + struct i915_vma_ops vma_ops; + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); +}; + +/* + * The Graphics Translation Table is the way in which GEN hardware translates a + * Graphics Virtual Address into a Physical Address. In addition to the normal + * collateral associated with any va->pa translations GEN hardware also has a + * portion of the GTT which can be mapped by the CPU and remain both coherent + * and correct (in cases like swizzling). That region is referred to as GMADR in + * the spec. + */ +struct i915_ggtt { + struct i915_address_space vm; + + struct io_mapping iomap; /* Mapping to our CPU mappable region */ + struct resource gmadr; /* GMADR resource */ + resource_size_t mappable_end; /* End offset that we can CPU map */ + + /** "Graphics Stolen Memory" holds the global PTEs */ + void __iomem *gsm; + void (*invalidate)(struct i915_ggtt *ggtt); + + /** PPGTT used for aliasing the PPGTT with the GTT */ + struct i915_ppgtt *alias; + + bool do_idle_maps; + + int mtrr; + + /** Bit 6 swizzling required for X tiling */ + u32 bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + u32 bit_6_swizzle_y; + + u32 pin_bias; + + unsigned int num_fences; + struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; + struct list_head fence_list; + + /** + * List of all objects in gtt_space, currently mmaped by userspace. + * All objects within this list must also be on bound_list. + */ + struct list_head userfault_list; + + /* Manual runtime pm autosuspend delay for user GGTT mmaps */ + struct intel_wakeref_auto userfault_wakeref; + + struct mutex error_mutex; + struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; +}; + +struct i915_ppgtt { + struct i915_address_space vm; + + struct i915_page_directory *pd; +}; + +#define i915_is_ggtt(vm) ((vm)->is_ggtt) + +static inline bool +i915_vm_is_4lvl(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); +} + +static inline bool +i915_vm_has_cache_coloring(struct i915_address_space *vm) +{ + return i915_is_ggtt(vm) && vm->mm.color_adjust; +} + +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ggtt, vm)); + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, vm); +} + +static inline struct i915_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm)); + GEM_BUG_ON(i915_is_ggtt(vm)); + return container_of(vm, struct i915_ppgtt, vm); +} + +static inline struct i915_address_space * +i915_vm_get(struct i915_address_space *vm) +{ + kref_get(&vm->ref); + return vm; +} + +void i915_vm_release(struct kref *kref); + +static inline void i915_vm_put(struct i915_address_space *vm) +{ + kref_put(&vm->ref, i915_vm_release); +} + +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass); +void i915_address_space_fini(struct i915_address_space *vm); + +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) +{ + const u32 mask = NUM_PTE(pde_shift) - 1; + + return (address >> PAGE_SHIFT) & mask; +} + +/* + * Helper to counts the number of PTEs within the given length. This count + * does not cross a page table boundary, so the max value would be + * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. + */ +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) +{ + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; + + GEM_BUG_ON(length == 0); + GEM_BUG_ON(offset_in_page(addr | length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); + + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); +} + +static inline u32 i915_pde_index(u64 addr, u32 shift) +{ + return (addr >> shift) & I915_PDE_MASK; +} + +static inline struct i915_page_table * +i915_pt_entry(const struct i915_page_directory * const pd, + const unsigned short n) +{ + return pd->entry[n]; +} + +static inline struct i915_page_directory * +i915_pd_entry(const struct i915_page_directory * const pdp, + const unsigned short n) +{ + return pdp->entry[n]; +} + +static inline dma_addr_t +i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) +{ + struct i915_page_dma *pt = ppgtt->pd->entry[n]; + + return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); + +int i915_ggtt_probe_hw(struct drm_i915_private *i915); +int i915_ggtt_init_hw(struct drm_i915_private *i915); +int i915_ggtt_enable_hw(struct drm_i915_private *i915); +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); +int i915_init_ggtt(struct drm_i915_private *i915); +void i915_ggtt_driver_release(struct drm_i915_private *i915); + +static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) +{ + return ggtt->mappable_end > 0; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt); + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915); +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915); + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); + +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); + +#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) +#define fill32_px(px, v) do { \ + u64 v__ = lower_32_bits(v); \ + fill_px((px), v__ << 32 | v__); \ +} while (0) + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp); +void cleanup_scratch_page(struct i915_address_space *vm); +void free_scratch(struct i915_address_space *vm); + +struct i915_page_table *alloc_pt(struct i915_address_space *vm); +struct i915_page_directory *alloc_pd(struct i915_address_space *vm); +struct i915_page_directory *__alloc_pd(size_t sz); + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd); + +#define free_px(vm, px) free_pd(vm, px_base(px)) + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)); + +#define set_pd_entry(pd, idx, to) \ + __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch); + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch); +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); + +int ggtt_set_pages(struct i915_vma *vma); +int ppgtt_set_pages(struct i915_vma *vma); +void clear_pages(struct i915_vma *vma); + +void gtt_write_workarounds(struct intel_gt *gt); + +void setup_private_pat(struct intel_uncore *uncore); + +static inline struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg->length }; +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c deleted file mode 100644 index 6bcfa6456c45..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "intel_reset.h" -#include "i915_drv.h" - -struct hangcheck { - u64 acthd; - u32 ring; - u32 head; - enum intel_engine_hangcheck_action action; - unsigned long action_timestamp; - int deadlock; - struct intel_instdone instdone; - bool wedged:1; - bool stalled:1; -}; - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - if (engine->id != RCS0) - return true; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return ENGINE_ACTIVE_HEAD; - } - - if (!subunits_stuck(engine)) - return ENGINE_ACTIVE_SUBUNITS; - - return ENGINE_DEAD; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - struct drm_i915_private *dev_priv = engine->i915; - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != ENGINE_DEAD) - return ha; - - if (IS_GEN(dev_priv, 2)) - return ENGINE_DEAD; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = ENGINE_READ(engine, RING_CTL); - if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, engine->mask, 0, - "stuck wait on %s", engine->name); - ENGINE_WRITE(engine, RING_CTL, tmp); - return ENGINE_WAIT_KICK; - } - - return ENGINE_DEAD; -} - -static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - hc->acthd = intel_engine_get_active_head(engine); - hc->ring = ENGINE_READ(engine, RING_START); - hc->head = ENGINE_READ(engine, RING_HEAD); -} - -static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_ring = hc->ring; - engine->hangcheck.last_head = hc->head; -} - -static enum intel_engine_hangcheck_action -hangcheck_get_action(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - if (intel_engine_is_idle(engine)) - return ENGINE_IDLE; - - if (engine->hangcheck.last_ring != hc->ring) - return ENGINE_ACTIVE_SEQNO; - - if (engine->hangcheck.last_head != hc->head) - return ENGINE_ACTIVE_SEQNO; - - return engine_stuck(engine, hc->acthd); -} - -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; - - hc->action = hangcheck_get_action(engine, hc); - - /* We always increment the progress - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, make it as a progress as the seqno - * advancement might ensure and if not, it - * will catch the hanging engine. - */ - - switch (hc->action) { - case ENGINE_IDLE: - case ENGINE_ACTIVE_SEQNO: - /* Clear head and subunit states on seqno movement */ - hc->acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - /* Intentional fall through */ - case ENGINE_WAIT_KICK: - case ENGINE_WAIT: - engine->hangcheck.action_timestamp = jiffies; - break; - - case ENGINE_ACTIVE_HEAD: - case ENGINE_ACTIVE_SUBUNITS: - /* - * Seqno stuck with still active engine gets leeway, - * in hopes that it is just a long shader. - */ - timeout = I915_SEQNO_DEAD_TIMEOUT; - break; - - case ENGINE_DEAD: - break; - - default: - MISSING_CASE(hc->action); - } - - hc->stalled = time_after(jiffies, - engine->hangcheck.action_timestamp + timeout); - hc->wedged = time_after(jiffies, - engine->hangcheck.action_timestamp + - I915_ENGINE_WEDGED_TIMEOUT); -} - -static void hangcheck_declare_hang(struct drm_i915_private *i915, - intel_engine_mask_t hung, - intel_engine_mask_t stuck) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - char msg[80]; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, i915, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void i915_hangcheck_elapsed(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), - gpu_error.hangcheck_work.work); - intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - if (!i915_modparams.enable_hangcheck) - return; - - if (!READ_ONCE(dev_priv->gt.awake)) - return; - - if (i915_terminally_wedged(dev_priv)) - return; - - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); - if (!wakeref) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); - - for_each_engine(engine, dev_priv, id) { - struct hangcheck hc; - - intel_engine_signal_breadcrumbs(engine); - - hangcheck_load_sample(engine, &hc); - hangcheck_accumulate_sample(engine, &hc); - hangcheck_store_sample(engine, &hc); - - if (hc.stalled) { - hung |= engine->mask; - if (hc.action != ENGINE_DEAD) - stuck |= engine->mask; - } - - if (hc.wedged) - wedged |= engine->mask; - } - - if (GEM_SHOW_DEBUG() && (hung | stuck)) { - struct drm_printer p = drm_debug_printer("hangcheck"); - - for_each_engine(engine, dev_priv, id) { - if (intel_engine_is_idle(engine)) - continue; - - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - } - - if (wedged) { - dev_err(dev_priv->drm.dev, - "GPU recovery timed out," - " cancelling all in-flight rendering.\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(dev_priv); - } - - if (hung) - hangcheck_declare_hang(dev_priv, hung, stuck); - - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - - /* Reset timer in case GPU hangs without another request being added */ - i915_queue_hangcheck(dev_priv); -} - -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - engine->hangcheck.action_timestamp = jiffies; -} - -void intel_hangcheck_init(struct drm_i915_private *i915) -{ - INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, - i915_hangcheck_elapsed); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_hangcheck.c" -#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c new file mode 100644 index 000000000000..ceb785b75c25 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/cpufreq.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_sideband.h" + +struct ia_constants { + unsigned int min_gpu_freq; + unsigned int max_gpu_freq; + + unsigned int min_ring_freq; + unsigned int max_ia_freq; +}; + +static struct intel_gt *llc_to_gt(struct intel_llc *llc) +{ + return container_of(llc, struct intel_gt, llc); +} + +static unsigned int cpu_max_MHz(void) +{ + struct cpufreq_policy *policy; + unsigned int max_khz; + + policy = cpufreq_cpu_get(0); + if (policy) { + max_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_khz = tsc_khz; + } + + return max_khz / 1000; +} + +static bool get_ia_constants(struct intel_llc *llc, + struct ia_constants *consts) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct intel_rps *rps = &llc_to_gt(llc)->rps; + + if (rps->max_freq <= rps->min_freq) + return false; + + consts->max_ia_freq = cpu_max_MHz(); + + consts->min_ring_freq = + intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3); + + consts->min_gpu_freq = rps->min_freq; + consts->max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + consts->min_gpu_freq /= GEN9_FREQ_SCALER; + consts->max_gpu_freq /= GEN9_FREQ_SCALER; + } + + return true; +} + +static void calc_ia_freq(struct intel_llc *llc, + unsigned int gpu_freq, + const struct ia_constants *consts, + unsigned int *out_ia_freq, + unsigned int *out_ring_freq) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + const int diff = consts->max_gpu_freq - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; + + if (INTEL_GEN(i915) >= 9) { + /* + * ring_freq = 2 * GT. ring_freq is in 100MHz units + * No floor required for ring frequency on SKL. + */ + ring_freq = gpu_freq; + } else if (INTEL_GEN(i915) >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(consts->min_ring_freq, gpu_freq); + } else if (IS_HASWELL(i915)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(consts->min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + const int min_freq = 15; + const int scale = 180; + + /* + * On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = consts->max_ia_freq - diff * scale / 2; + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } + + *out_ia_freq = ia_freq; + *out_ring_freq = ring_freq; +} + +static void gen6_update_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + unsigned int gpu_freq; + + if (!get_ia_constants(llc, &consts)) + return; + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = consts.max_gpu_freq; + gpu_freq >= consts.min_gpu_freq; + gpu_freq--) { + unsigned int ia_freq, ring_freq; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + sandybridge_pcode_write(i915, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); + } +} + +void intel_llc_enable(struct intel_llc *llc) +{ + if (HAS_LLC(llc_to_gt(llc)->i915)) + gen6_update_ring_freq(llc); +} + +void intel_llc_disable(struct intel_llc *llc) +{ + /* Currently there is no HW configuration to be done to disable. */ +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_llc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h new file mode 100644 index 000000000000..ef09a890d2b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_H +#define INTEL_LLC_H + +struct intel_llc; + +void intel_llc_enable(struct intel_llc *llc); +void intel_llc_disable(struct intel_llc *llc); + +#endif /* INTEL_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h new file mode 100644 index 000000000000..ecad4687b930 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_TYPES_H +#define INTEL_LLC_TYPES_H + +struct intel_llc { +}; + +#endif /* INTEL_LLC_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 82b7ace62d97..a13a8c4b65ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,15 +133,19 @@ */ #include <linux/interrupt.h> -#include "gem/i915_gem_context.h" - #include "i915_drv.h" -#include "i915_gem_render_state.h" +#include "i915_perf.h" +#include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -161,6 +165,15 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) +#define GEN12_IDLE_CTX_ID 0x7FF +#define GEN12_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 @@ -214,12 +227,65 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); + static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close); +static void +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); + +static void mark_eio(struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); +} + +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + struct i915_request *active = rq; + + rcu_read_lock(); + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + rcu_read_unlock(); + + return active; +} + +static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + +static inline void +ring_set_paused(const struct intel_engine_cs *engine, int state) +{ + /* + * We inspect HWS_PREEMPT with a semaphore inside + * engine->emit_fini_breadcrumb. If the dword is true, + * the ring is paused as the semaphore will busywait + * until the dword is false. + */ + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; + if (state) + wmb(); +} static inline struct i915_priolist *to_priolist(struct rb_node *rb) { @@ -236,6 +302,17 @@ static int effective_prio(const struct i915_request *rq) int prio = rq_prio(rq); /* + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). + */ + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + + /* * On unwinding the active request, we give it a priority bump * if it has completed waiting on any semaphore. If we know that * the request has already started, we can prevent an unwanted @@ -245,6 +322,7 @@ static int effective_prio(const struct i915_request *rq) prio |= I915_PRIORITY_NOSEMAPHORE; /* Restrict mere WAIT boosts from triggering preemption */ + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ return prio | __NO_PREEMPTION; } @@ -271,10 +349,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, { int last_prio; - if (!engine->preempt_context) - return false; - - if (i915_request_completed(rq)) + if (!intel_engine_has_semaphores(engine)) return false; /* @@ -288,10 +363,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * However, the priority hint is a mere hint that we may need to * preempt. If that hint is stale or we may be trying to preempt * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. */ - last_prio = effective_prio(rq); - if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) return false; /* @@ -338,9 +418,6 @@ __maybe_unused static inline bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { - const struct intel_engine_execlists *execlists = - &prev->engine->execlists; - /* * Without preemption, the prev may refer to the still active element * which we refuse to let go. @@ -348,7 +425,7 @@ assert_priority_queue(const struct i915_request *prev, * Even with preemption, there are times when we think it is better not * to preempt and leave an ostensibly lower priority request in flight. */ - if (port_request(execlists->port) == prev) + if (i915_request_is_active(prev)) return true; return rq_prio(prev) >= rq_prio(next); @@ -383,48 +460,496 @@ assert_priority_queue(const struct i915_request *prev, static u64 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); + desc = INTEL_LEGACY_32B_CONTEXT; + if (i915_vm_is_4lvl(ce->vm)) + desc = INTEL_LEGACY_64B_CONTEXT; + desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; - desc = ctx->desc_template; /* bits 0-11 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); - - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (IS_GEN(engine->i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; + desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } return desc; } -static void unwind_wa_tail(struct i915_request *rq) +static inline unsigned int dword_in_page(void *addr) +{ + return offset_in_page(addr) / sizeof(u32); +} + +static void set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine, + bool clear) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END(x) 0, (x) { - rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); - assert_ring_tail_valid(rq->ring, rq->tail); + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + count = *data++ & ~BIT(7); + if (clear) + memset32(regs, MI_NOOP, count); + regs += count; + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + regs[0] = base + (offset << 2); + if (clear) + regs[1] = 0; + regs += 2; + } while (--count); + } + + if (clear) { + u8 count = *++data; + + /* Clear past the tail for HW access */ + GEM_BUG_ON(dword_in_page(regs) > count); + memset32(regs, MI_NOOP, count - dword_in_page(regs)); + + /* Close the batch; used mainly by live_lrc_layout() */ + *regs = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *regs |= BIT(0); + } +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END(80) +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(176) +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END(80) +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END(80) +}; + +static const u8 gen9_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x34), + REG(0x30), + REG(0x38), + REG(0x3c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0xc8), + + NOP(13), + LRI(44, POSTED), + REG(0x28), + REG(0x9c), + REG(0xc0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x68), + + END(176) +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END(80) +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END(80) +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } } static struct i915_request * @@ -439,15 +964,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; - if (i915_request_completed(rq)) - break; + continue; /* XXX */ __i915_request_unsubmit(rq); - unwind_wa_tail(rq); - - GEM_BUG_ON(rq->hw_context->inflight); /* * Push the request back into the queue for later resubmission. @@ -456,8 +976,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -466,8 +985,26 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + active = rq; } else { + struct intel_engine_cs *owner = rq->context->engine; + + /* + * Decouple the virtual breadcrumb before moving it + * back to the virtual engine -- we don't want the + * request to complete in the background and try + * and cancel the breadcrumb on the virtual engine + * (instead of the old engine where it is linked)! + */ + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) { + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); + i915_request_cancel_breadcrumb(rq); + spin_unlock(&rq->lock); + } rq->engine = owner; owner->submit_request(rq); active = NULL; @@ -500,32 +1037,223 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -inline void -execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port) +static void intel_engine_context_in(struct intel_engine_cs *engine) { - execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); } -inline void -execlists_user_end(struct intel_engine_execlists *execlists) +static void intel_engine_context_out(struct intel_engine_cs *engine) { - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); } -static inline void -execlists_context_schedule_in(struct i915_request *rq) +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static void +execlists_check_context(const struct intel_context *ce, + const struct intel_engine_cs *engine) { - GEM_BUG_ON(rq->hw_context->inflight); + const struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + bool valid = true; + int x; + + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_START], + i915_ggtt_offset(ring->vma)); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + valid = false; + } + + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != + (RING_CTL_SIZE(ring->size) | RING_VALID)) { + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_CTL], + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + valid = false; + } + + x = lrc_ring_mi_mode(engine); + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", + engine->name, regs[x + 1]); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + valid = false; + } + + WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); +} +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", + rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + +static inline struct intel_engine_cs * +__execlists_schedule_in(struct i915_request *rq) +{ + struct intel_engine_cs * const engine = rq->engine; + struct intel_context * const ce = rq->context; + + intel_context_get(ce); + + if (unlikely(intel_context_is_banned(ce))) + reset_active(rq, engine); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + execlists_check_context(ce, engine); + + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + ce->lrc_desc |= (u64)ce->tag << 32; + } else { + /* We don't need a strict matching tag, just different values */ + ce->lrc_desc &= ~GENMASK_ULL(47, 37); + ce->lrc_desc |= + (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << + GEN11_SW_CTX_ID_SHIFT; + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + } + + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); - rq->hw_context->inflight = rq->engine; + intel_engine_context_in(engine); + + return engine; } -static void kick_siblings(struct i915_request *rq) +static inline struct i915_request * +execlists_schedule_in(struct i915_request *rq, int idx) { - struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine); + struct intel_context * const ce = rq->context; + struct intel_engine_cs *old; + + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); + trace_i915_request_in(rq, idx); + + old = READ_ONCE(ce->inflight); + do { + if (!old) { + WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); + break; + } + } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); + + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + return i915_request_get(rq); +} + +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); if (next && next->execution_mask & ~rq->execution_mask) @@ -533,32 +1261,85 @@ static void kick_siblings(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq, unsigned long status) +__execlists_schedule_out(struct i915_request *rq, + struct intel_engine_cs * const engine) { - rq->hw_context->inflight = NULL; - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, status); - trace_i915_request_out(rq); + struct intel_context * const ce = rq->context; /* - * If this is part of a virtual engine, its next request may have - * been blocked waiting for access to the active context. We have - * to kick all the siblings again in case we need to switch (e.g. - * the next request is not runnable on this engine). Hopefully, - * we will already have submitted the next request before the - * tasklet runs and do not need to rebuild each virtual tree - * and kick everyone again. + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. */ - if (rq->engine != rq->hw_context->engine) - kick_siblings(rq); + + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); + + intel_engine_context_out(engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + intel_gt_pm_put_async(engine->gt); + + /* + * If this is part of a virtual engine, its next request may + * have been blocked waiting for access to the active context. + * We have to kick all the siblings again in case we need to + * switch (e.g. the next request is not runnable on this + * engine). Hopefully, we will already have submitted the next + * request before the tasklet runs and do not need to rebuild + * each virtual tree and kick everyone again. + */ + if (ce->engine != engine) + kick_siblings(rq, ce); + + intel_context_put(ce); +} + +static inline void +execlists_schedule_out(struct i915_request *rq) +{ + struct intel_context * const ce = rq->context; + struct intel_engine_cs *cur, *old; + + trace_i915_request_out(rq); + + old = READ_ONCE(ce->inflight); + do + cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; + while (!try_cmpxchg(&ce->inflight, &old, cur)); + if (!cur) + __execlists_schedule_out(rq, old); + + i915_request_put(rq); } static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; + u64 desc = ce->lrc_desc; + u32 tail; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = - intel_ring_set_tail(rq->ring, rq->tail); + /* + * WaIdleLiteRestore:bdw,skl + * + * We should never submit the context with the same RING_TAIL twice + * just in case we submit an empty ring, which confuses the HW. + * + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of + * the normal request to be able to always advance the RING_TAIL on + * subsequent resubmissions (for lite restore). Should that fail us, + * and we try and submit the same tail again, force the context + * reload. + */ + tail = intel_ring_set_tail(rq->ring, rq->tail); + if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; + rq->tail = rq->wa_tail; /* * Make sure the context image is complete before we submit it to HW. @@ -569,14 +1350,11 @@ static u64 execlists_update_context(struct i915_request *rq) * may not be visible to the HW prior to the completion of the UC * register write and that we may begin execution from the context * before its image is complete leading to invalid PD chasing. - * - * Furthermore, Braswell, at least, wants a full mb to be sure that - * the writes are coherent in memory (visible to the GPU) prior to - * execution, and not just visible to other CPUs (as is the result of - * wmb). */ - mb(); - return ce->lrc_desc; + wmb(); + + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + return desc; } static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) @@ -590,12 +1368,110 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static __maybe_unused void +trace_ports(const struct intel_engine_execlists *execlists, + const char *msg, + struct i915_request * const *ports) +{ + const struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + if (!ports[0]) + return; + + ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); +} + +static __maybe_unused bool +assert_pending_valid(const struct intel_engine_execlists *execlists, + const char *msg) +{ + struct i915_request * const *port, *rq; + struct intel_context *ce = NULL; + + trace_ports(execlists, msg, execlists->pending); + + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); + return false; + } + + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); + return false; + } + + for (port = execlists->pending; (rq = *port); port++) { + unsigned long flags; + bool ok = true; + + GEM_BUG_ON(!kref_read(&rq->fence.refcount)); + GEM_BUG_ON(!i915_request_is_active(rq)); + + if (ce == rq->context) { + GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + ce = rq->context; + + /* Hold tightly onto the lock to prevent concurrent retires! */ + if (!spin_trylock_irqsave(&rq->lock, flags)) + continue; + + if (i915_request_completed(rq)) + goto unlock; + + if (i915_active_is_idle(&ce->active) && + !intel_context_is_barrier(ce)) { + GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + +unlock: + spin_unlock_irqrestore(&rq->lock, flags); + if (!ok) + return false; + } + + return ce; +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct execlist_port *port = execlists->port; unsigned int n; + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); + /* * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will @@ -604,7 +1480,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * that all ELSP are drained i.e. we have processed the CSB, * before allowing ourselves to idle and calling intel_runtime_pm_put(). */ - GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref)); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); /* * ELSQ note: the submit queue is not cleared after being submitted @@ -613,44 +1489,22 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * of elsq entries, keep this in mind before changing the loop below. */ for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq; - unsigned int count; - u64 desc; - - rq = port_unpack(&port[n], &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_schedule_in(rq); - port_set(&port[n], port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - - GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, n, - port[n].context_id, count, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - rq_prio(rq)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } + struct i915_request *rq = execlists->pending[n]; - write_desc(execlists, desc, n); + write_desc(execlists, + rq ? execlists_update_context(rq) : 0, + n); } /* we need to manually load the submit queue */ if (execlists->ctrl_reg) writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ce->gem_context)); + intel_context_force_single_submission(ce)); } static bool can_merge_ctx(const struct intel_context *prev, @@ -668,110 +1522,35 @@ static bool can_merge_ctx(const struct intel_context *prev, static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { + GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); - if (!can_merge_ctx(prev->hw_context, next->hw_context)) - return false; - - return true; -} - -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_request_put(port_request(port)); - - port_set(port, port_pack(i915_request_get(rq), port_count(port))); -} - -static void inject_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = engine->preempt_context; - unsigned int n; - - GEM_BUG_ON(execlists->preempt_complete_status != - upper_32_bits(ce->lrc_desc)); - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). + * We do not submit known completed requests. Therefore if the next + * request is already completed, we can pretend to merge it in + * with the previous context (and we will skip updating the ELSP + * and tracking). Thus hopefully keeping the ELSP full with active + * contexts, despite the best efforts of preempt-to-busy to confuse + * us. */ - GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); - - write_desc(execlists, ce->lrc_desc, n); - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); - execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); - - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); -} + if (i915_request_completed(next)) + return true; -static void complete_preempt_context(struct intel_engine_execlists *execlists) -{ - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); + if (unlikely((prev->fence.flags ^ next->fence.flags) & + (BIT(I915_FENCE_FLAG_NOPREEMPT) | + BIT(I915_FENCE_FLAG_SENTINEL)))) + return false; - if (inject_preempt_hang(execlists)) - return; + if (!can_merge_ctx(prev->context, next->context)) + return false; - execlists_cancel_port_requests(execlists); - __unwind_incomplete_requests(container_of(execlists, - struct intel_engine_cs, - execlists)); + return true; } static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - u32 base = engine->mmio_base; - - /* Must match execlists_init_reg_state()! */ - - regs[CTX_CONTEXT_CONTROL] = - i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); - regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); - regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); - regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); - regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); - - regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); - regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); - regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); - regs[CTX_SECOND_BB_HEAD_U] = - i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); - regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); - regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); - - regs[CTX_CTX_TIMESTAMP] = - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); - regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); - regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); - regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); - regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); - regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); - regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); - regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - - if (engine->class == RENDER_CLASS) { - regs[CTX_RCS_INDIRECT_CTX] = - i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); - regs[CTX_RCS_INDIRECT_CTX_OFFSET] = - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); - regs[CTX_BB_PER_CTX_PTR] = - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); - - regs[CTX_R_PWR_CLK_STATE] = - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - } + set_offsets(regs, reg_offsets(engine), engine, false); } static bool virtual_matches(const struct virtual_engine *ve, @@ -792,7 +1571,7 @@ static bool virtual_matches(const struct virtual_engine *ve, * we reuse the register offsets). This is a very small * hystersis on the greedy seelction algorithm. */ - inflight = READ_ONCE(ve->context.inflight); + inflight = intel_context_inflight(&ve->context); if (inflight && inflight != engine) return false; @@ -810,18 +1589,171 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, if (!list_empty(&ve->context.signal_link)) { list_move_tail(&ve->context.signal_link, &engine->breadcrumbs.signalers); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } spin_unlock(&old->breadcrumbs.irq_lock); } +static struct i915_request * +last_active(const struct intel_engine_execlists *execlists) +{ + struct i915_request * const *last = READ_ONCE(execlists->active); + + while (*last && i915_request_completed(*last)) + last++; + + return *last; +} + +static void defer_request(struct i915_request *rq, struct list_head * const pl) +{ + LIST_HEAD(list); + + /* + * We want to move the interrupted request to the back of + * the round-robin list (i.e. its priority level), but + * in doing so, we must then move all requests that were in + * flight and were waiting for the interrupted request to + * be run after it again. + */ + do { + struct i915_dependency *p; + + GEM_BUG_ON(i915_request_is_active(rq)); + list_move_tail(&rq->sched.link, pl); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + /* No waiter should start before its signaler */ + GEM_BUG_ON(i915_request_started(w) && + !i915_request_completed(rq)); + + GEM_BUG_ON(i915_request_is_active(w)); + if (!i915_request_is_ready(w)) + continue; + + if (rq_prio(w) < rq_prio(rq)) + continue; + + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void defer_active(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = __unwind_incomplete_requests(engine); + if (!rq) + return; + + defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); +} + +static bool +need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + int hint; + + if (!intel_engine_has_timeslices(engine)) + return false; + + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return false; + + hint = max(rq_prio(list_next_entry(rq, sched.link)), + engine->execlists.queue_priority_hint); + + return hint >= effective_prio(rq); +} + +static int +switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return INT_MIN; + + return rq_prio(list_next_entry(rq, sched.link)); +} + +static inline unsigned long +timeslice(const struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->props.timeslice_duration_ms); +} + +static unsigned long +active_timeslice(const struct intel_engine_cs *engine) +{ + const struct i915_request *rq = *engine->execlists.active; + + if (!rq || i915_request_completed(rq)) + return 0; + + if (engine->execlists.switch_priority_hint < effective_prio(rq)) + return 0; + + return timeslice(engine); +} + +static void set_timeslice(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_timeslices(engine)) + return; + + set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); +} + +static void record_preemption(struct intel_engine_execlists *execlists) +{ + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); +} + +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = last_active(&engine->execlists); + if (!rq) + return 0; + + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ + if (unlikely(intel_context_is_banned(rq->context))) + return 1; + + return READ_ONCE(engine->props.preempt_timeout_ms); +} + +static void set_preempt_timeout(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_preempt_reset(engine)) + return; + + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine)); +} + +static inline void clear_ports(struct i915_request **ports, int count) +{ + memset_p((void **)ports, NULL, count); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - struct i915_request *last = port_request(port); + struct i915_request **port = execlists->pending; + struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -867,65 +1799,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } + /* + * If the queue is higher priority than the last + * request in the currently active context, submit afresh. + * We will resubmit again afterwards in case we need to split + * the active context to interject the preemption request, + * i.e. we will retrigger preemption following the ack in case + * of trouble. + */ + last = last_active(execlists); if (last) { - /* - * Don't resubmit or switch until all outstanding - * preemptions (lite-restore) are seen. Then we - * know the next preemption status we see corresponds - * to this ELSP update. - */ - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(!port_count(&port[0])); + if (need_preempt(engine, last, rb)) { + ENGINE_TRACE(engine, + "preempting last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + record_preemption(execlists); - /* - * If we write to ELSP a second time before the HW has had - * a chance to respond to the previous write, we can confuse - * the HW and hit "undefined behaviour". After writing to ELSP, - * we must then wait until we see a context-switch event from - * the HW to indicate that it has had a chance to respond. - */ - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return; + /* + * Don't let the RING_HEAD advance past the breadcrumb + * as we unwind (and until we resubmit) so that we do + * not accidentally tell it to go backwards. + */ + ring_set_paused(engine, 1); - if (need_preempt(engine, last, rb)) { - inject_preempt_context(engine); - return; - } + /* + * Note that we have not stopped the GPU at this point, + * so we are unwinding the incomplete requests as they + * remain inflight and so by the time we do complete + * the preemption, some of the unwound requests may + * complete! + */ + __unwind_incomplete_requests(engine); - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). However, we have to double check that the - * priorities of the ports haven't been switch. - */ - if (port_count(&port[1])) - return; + /* + * If we need to return to the preempted context, we + * need to skip the lite-restore and force it to + * reload the RING_TAIL. Otherwise, the HW has a + * tendency to ignore us rewinding the TAIL to the + * end of an earlier request. + */ + last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last = NULL; + } else if (need_timeslice(engine, last) && + timer_expired(&engine->execlists.timer)) { + ENGINE_TRACE(engine, + "expired last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + + ring_set_paused(engine, 1); + defer_active(engine); - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; + /* + * Unlike for preemption, if we rewind and continue + * executing the same context as previously active, + * the order of execution will remain the same and + * the tail will only advance. We do not need to + * force a full context restore, as a lite-restore + * is sufficient to resample the monotonic TAIL. + * + * If we switch to any other context, similarly we + * will not rewind TAIL of current context, and + * normal save/restore will preserve state and allow + * us to later continue executing the same request. + */ + last = NULL; + } else { + /* + * Otherwise if we already have a request pending + * for execution after the current one, we can + * just wait until the next CS event before + * queuing more. In either case we will force a + * lite-restore preemption event, but if we wait + * we hopefully coalesce several updates into a single + * submission. + */ + if (!list_is_last(&last->sched.link, + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + if (!execlists->timer.expires && + need_timeslice(engine, last)) + set_timer_ms(&execlists->timer, + timeslice(engine)); + + return; + } + } } while (rb) { /* XXX virtual is always taking precedence */ @@ -946,7 +1913,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq != ve->request); GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->hw_context != &ve->context); + GEM_BUG_ON(rq->context != &ve->context); if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { @@ -957,17 +1924,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this rq for another engine */ + return; /* leave this for another */ } - GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - yesno(engine != ve->siblings[0])); + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); ve->request = NULL; ve->base.execlists.queue_priority_hint = INT_MIN; @@ -982,7 +1949,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) unsigned int n; GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - virtual_update_register_offsets(regs, engine); + + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(regs, + engine); if (!list_empty(&ve->context.signals)) virtual_xfer_breadcrumbs(ve, engine); @@ -1005,10 +1975,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(ve->siblings[0] != engine); } - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - submit = true; - last = rq; + if (__i915_request_submit(rq)) { + submit = true; + last = rq; + } + i915_request_put(rq); + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (!submit) { + spin_unlock(&ve->base.active.lock); + rb = rb_first_cached(&execlists->virtual); + continue; + } } spin_unlock(&ve->base.active.lock); @@ -1021,6 +2005,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + bool merge = true; + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -1046,7 +2032,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * same LRCA, i.e. we must submit 2 different * contexts if we submit 2 ELSP. */ - if (last->hw_context == rq->hw_context) + if (last->context == rq->context) + goto done; + + if (i915_request_has_sentinel(last)) goto done; /* @@ -1056,23 +2045,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) + if (ctx_single_port_submission(last->context) || + ctx_single_port_submission(rq->context)) goto done; - - if (submit) - port_assign(port, last); - port++; - - GEM_BUG_ON(port_isset(port)); + merge = false; } - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); + if (__i915_request_submit(rq)) { + if (!merge) { + *port = execlists_schedule_in(last, port - execlists->pending); + port++; + last = NULL; + } - last = rq; - submit = true; + GEM_BUG_ON(last && + !can_merge_ctx(last->context, + rq->context)); + + submit = true; + last = rq; + } } rb_erase_cached(&p->node, &execlists->queue); @@ -1099,52 +2092,47 @@ done: execlists->queue_priority_hint = queue_prio(execlists); if (submit) { - port_assign(port, last); - execlists_submit_ports(engine); - } + *port = execlists_schedule_in(last, port - execlists->pending); + execlists->switch_priority_hint = + switch_prio(engine, *execlists->pending); - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); + /* + * Skip if we ended up with exactly the same set of requests, + * e.g. trying to timeslice a pair of ordered contexts + */ + if (!memcmp(execlists->active, execlists->pending, + (port - execlists->pending + 1) * sizeof(*port))) { + do + execlists_schedule_out(fetch_and_zero(port)); + while (port-- != execlists->pending); - /* Re-evaluate the executing context setup after each preemptive kick */ - if (last) - execlists_user_begin(execlists, execlists->port); + goto skip_submit; + } + clear_ports(port + 1, last_port - port); - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); + execlists_submit_ports(engine); + set_preempt_timeout(engine); + } else { +skip_submit: + ring_set_paused(engine, 0); + } } -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) +static void +cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port; - while (num_ports-- && port_isset(port)) { - struct i915_request *rq = port_request(port); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); + clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); - GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", - rq->engine->name, - (unsigned int)(port - execlists->port), - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); - GEM_BUG_ON(!execlists->active); - execlists_context_schedule_out(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); - - i915_request_put(rq); - - memset(port, 0, sizeof(*port)); - port++; - } - - execlists_clear_all_active(execlists); + WRITE_ONCE(execlists->active, execlists->inflight); } static inline void @@ -1160,16 +2148,83 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } +/* + * Starting with Gen12, the status has a new format: + * + * bit 0: switched to new queue + * bit 1: reserved + * bit 2: semaphore wait mode (poll or signal), only valid when + * switch detail is set to "wait on semaphore" + * bits 3-5: engine class + * bits 6-11: engine instance + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bits 32-35: context switch detail + * - 0: ctx complete + * - 1: wait on sync flip + * - 2: wait on vblank + * - 3: wait on scanline + * - 4: wait on semaphore + * - 5: context preempted (not on SEMAPHORE_WAIT or + * WAIT_FOR_EVENT) + * bit 36: reserved + * bits 37-43: wait detail (for switch detail 1 to 4) + * bits 44-46: reserved + * bits 47-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from + */ +static inline bool +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + u32 lower_dw = csb[0]; + u32 upper_dw = csb[1]; + bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + + /* + * The context switch detail is not guaranteed to be 5 when a preemption + * occurs, so we can't just check for that. The check below works for + * all the cases we care about, including preemptions of WAIT + * instructions and lite-restore. Preempt-to-idle via the CTRL register + * would require some extra handling, but we don't support that. + */ + if (!ctx_away_valid || new_queue) { + GEM_BUG_ON(!ctx_to_valid); + return true; + } + + /* + * switch detail = 5 is covered by the case above and we do not expect a + * context switch on an unsuccessful wait instruction since we always + * use polling mode. + */ + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); + return false; +} + +static inline bool +gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; const u32 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->active.lock); - GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1183,7 +2238,7 @@ static void process_csb(struct intel_engine_cs *engine) */ head = execlists->csb_head; tail = READ_ONCE(*execlists->csb_write); - GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); if (unlikely(head == tail)) return; @@ -1198,9 +2253,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - struct i915_request *rq; - unsigned int status; - unsigned int count; + bool promote; if (++head == num_entries) head = 0; @@ -1223,68 +2276,41 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", - engine->name, head, - buf[2 * head + 0], buf[2 * head + 1], - execlists->active); - - status = buf[2 * head]; - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); - - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; - - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); - - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } - - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; - - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - rq ? hwsp_seqno(rq) : 0, - rq ? rq_prio(rq) : 0); + ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", + head, buf[2 * head + 0], buf[2 * head + 1]); - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + if (INTEL_GEN(engine->i915) >= 12) + promote = gen12_csb_parse(execlists, buf + 2 * head); + else + promote = gen8_csb_parse(execlists, buf + 2 * head); + if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); + + /* cancel old inflight, prepare for switch */ + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); + + /* switch pending to inflight */ + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); + + WRITE_ONCE(execlists->pending[0], NULL); + } else { + GEM_BUG_ON(!*execlists->active); - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + /* port0 completed, advanced to port1 */ + trace_ports(execlists, "completed", execlists->active); /* * We rely on the hardware being strongly @@ -1292,26 +2318,17 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(rq)); - - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); + GEM_BUG_ON(!i915_request_completed(*execlists->active) && + !reset_in_progress(execlists)); + execlists_schedule_out(*execlists->active++); - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); - - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); } } while (head != tail); execlists->csb_head = head; + set_timeslice(engine); /* * Gen11 has proven to fail wrt global observation point between @@ -1330,10 +2347,356 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - - process_csb(engine); - if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } +} + +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + RQ_TRACE(rq, "on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (!i915_request_is_ready(w)) + continue; + + if (i915_request_completed(w)) + continue; + + if (i915_request_on_hold(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static bool execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + if (i915_request_completed(rq)) { /* too late! */ + rq = NULL; + goto unlock; + } + + if (rq->engine != engine) { /* preempted virtual engine */ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + + /* + * intel_context_inflight() is only protected by virtue + * of process_csb() being called only by the tasklet (or + * directly from inside reset while the tasklet is suspended). + * Assert that neither of those are allowed to run while we + * poke at the request queues. + */ + GEM_BUG_ON(!reset_in_progress(&engine->execlists)); + + /* + * An unsubmitted request along a virtual engine will + * remain on the active (this) engine until we are able + * to process the context switch away (and so mark the + * context as no longer in flight). That cannot have happened + * yet, otherwise we would not be hanging! + */ + spin_lock(&ve->base.active.lock); + GEM_BUG_ON(intel_context_inflight(rq->context) != engine); + GEM_BUG_ON(ve->request != rq); + ve->request = NULL; + spin_unlock(&ve->base.active.lock); + i915_request_put(rq); + + rq->engine = engine; + } + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_on_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + +unlock: + spin_unlock_irq(&engine->active.lock); + return rq; +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + + /* + * If one of our ancestors is on hold, we must also be on hold, + * otherwise we will bypass it and execute before it. + */ + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + if (i915_request_on_hold(s)) + return true; + } + + return false; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + GEM_BUG_ON(!i915_request_on_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + RQ_TRACE(rq, "hold release\n"); + + /* Also release any children on this engine that are ready */ + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_on_hold(rq)) + continue; + + /* Check that no other parents are also on hold */ + if (hold_request(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + +struct execlists_capture { + struct work_struct work; + struct i915_request *rq; + struct i915_gpu_coredump *error; +}; + +static void execlists_capture_work(struct work_struct *work) +{ + struct execlists_capture *cap = container_of(work, typeof(*cap), work); + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + struct intel_engine_cs *engine = cap->rq->engine; + struct intel_gt_coredump *gt = cap->error->gt; + struct intel_engine_capture_vma *vma; + + /* Compress all the objects attached to the request, slow! */ + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); + if (vma) { + struct i915_vma_compress *compress = + i915_vma_capture_prepare(gt); + + intel_engine_coredump_add_vma(gt->engine, vma, compress); + i915_vma_capture_finish(gt, compress); + } + + gt->simulated = gt->engine->simulated; + cap->error->simulated = gt->simulated; + + /* Publish the error state, and announce it to the world */ + i915_error_state_store(cap->error); + i915_gpu_coredump_put(cap->error); + + /* Return this request and all that depend upon it for signaling */ + execlists_unhold(engine, cap->rq); + i915_request_put(cap->rq); + + kfree(cap); +} + +static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) +{ + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct execlists_capture *cap; + + cap = kmalloc(sizeof(*cap), gfp); + if (!cap) + return NULL; + + cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); + if (!cap->error) + goto err_cap; + + cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); + if (!cap->error->gt) + goto err_gpu; + + cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); + if (!cap->error->gt->engine) + goto err_gt; + + return cap; + +err_gt: + kfree(cap->error->gt); +err_gpu: + kfree(cap->error); +err_cap: + kfree(cap); + return NULL; +} + +static bool execlists_capture(struct intel_engine_cs *engine) +{ + struct execlists_capture *cap; + + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) + return true; + + /* + * We need to _quickly_ capture the engine state before we reset. + * We are inside an atomic section (softirq) here and we are delaying + * the forced preemption event. + */ + cap = capture_regs(engine); + if (!cap) + return true; + + cap->rq = execlists_active(&engine->execlists); + GEM_BUG_ON(!cap->rq); + + rcu_read_lock(); + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + cap->rq = i915_request_get_rcu(cap->rq); + rcu_read_unlock(); + if (!cap->rq) + goto err_free; + + /* + * Remove the request from the execlists queue, and take ownership + * of the request. We pass it to our worker who will _slowly_ compress + * all the pages the _user_ requested for debugging their batch, after + * which we return it to the queue for signaling. + * + * By removing them from the execlists queue, we also remove the + * requests from being processed by __unwind_incomplete_requests() + * during the intel_engine_reset(), and so they will *not* be replayed + * afterwards. + * + * Note that because we have not yet reset the engine at this point, + * it is possible for the request that we have identified as being + * guilty, did in fact complete and we will then hit an arbitration + * point allowing the outstanding preemption to succeed. The likelihood + * of that is very low (as capturing of the engine registers should be + * fast enough to run inside an irq-off atomic section!), so we will + * simply hold that request accountable for being non-preemptible + * long enough to force the reset. + */ + if (!execlists_hold(engine, cap->rq)) + goto err_rq; + + INIT_WORK(&cap->work, execlists_capture_work); + schedule_work(&cap->work); + return true; + +err_rq: + i915_request_put(cap->rq); +err_free: + i915_gpu_coredump_put(cap->error); + kfree(cap); + return false; +} + +static noinline void preempt_reset(struct intel_engine_cs *engine) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (i915_modparams.reset < 3) + return; + + if (test_and_set_bit(bit, lock)) + return; + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", + READ_ONCE(engine->props.preempt_timeout_ms), + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + + ring_set_paused(engine, 1); /* Freeze the current request in place */ + if (execlists_capture(engine)) + intel_engine_reset(engine, "preemption time out"); + else + ring_set_paused(engine, 0); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(const struct intel_engine_cs *const engine) +{ + const struct timer_list *t = &engine->execlists.preempt; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!timer_expired(t)) + return false; + + return READ_ONCE(engine->execlists.pending[0]); } /* @@ -1343,24 +2706,48 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; + bool timeout = preempt_timeout(engine); + + process_csb(engine); + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { + unsigned long flags; - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - !!intel_wakeref_active(&engine->wakeref), - engine->execlists.active); + spin_lock_irqsave(&engine->active.lock, flags); + __execlists_submission_tasklet(engine); + spin_unlock_irqrestore(&engine->active.lock, flags); - spin_lock_irqsave(&engine->active.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + /* Recheck after serialising with direct-submission */ + if (timeout && preempt_timeout(engine)) + preempt_reset(engine); + } +} + +static void __execlists_kick(struct intel_engine_execlists *execlists) +{ + /* Kick the tasklet for some interrupt coalescing and reset handling */ + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); } static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) + struct i915_request *rq) { - GEM_BUG_ON(!list_empty(&node->link)); - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -1376,12 +2763,23 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) tasklet_hi_schedule(&execlists->tasklet); } -static void submit_queue(struct intel_engine_cs *engine, int prio) +static void submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) { - if (prio > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = prio; - __submit_queue_imm(engine); - } + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return; + + execlists->queue_priority_hint = rq_prio(rq); + __submit_queue_imm(engine); +} + +static bool ancestor_on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_on_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); } static void execlists_submit_request(struct i915_request *request) @@ -1392,12 +2790,17 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, &request->sched, rq_prio(request)); + if (unlikely(ancestor_on_hold(engine, request))) { + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, rq_prio(request)); + submit_queue(engine, request); + } spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -1405,9 +2808,7 @@ static void execlists_submit_request(struct i915_request *request) static void __execlists_context_fini(struct intel_context *ce) { intel_ring_put(ce->ring); - - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void execlists_context_destroy(struct kref *kref) @@ -1420,18 +2821,46 @@ static void execlists_context_destroy(struct kref *kref) if (ce->state) __execlists_context_fini(ce); + intel_context_fini(ce); intel_context_free(ce); } +static void +set_redzone(void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += engine->context_size; + + memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); +} + +static void +check_redzone(const void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += engine->context_size; + + if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) + dev_err_once(engine->i915->drm.dev, + "%s context redzone overwritten!\n", + engine->name); +} + static void execlists_context_unpin(struct intel_context *ce) { - i915_gem_context_unpin_hw_id(ce->gem_context); + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, + ce->engine); + i915_gem_object_unpin_map(ce->state->obj); } static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1439,14 +2868,17 @@ __execlists_update_reg_state(struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ - if (engine->class == RENDER_CLASS) - regs[CTX_R_PWR_CLK_STATE + 1] = + if (engine->class == RENDER_CLASS) { + regs[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); + + i915_oa_init_reg_state(ce, engine); + } } static int @@ -1454,46 +2886,21 @@ __execlists_context_pin(struct intel_context *ce, struct intel_engine_cs *engine) { void *vaddr; - int ret; - - GEM_BUG_ON(!ce->gem_context->vm); - ret = execlists_context_deferred_alloc(ce, engine); - if (ret) - goto err; GEM_BUG_ON(!ce->state); - - ret = intel_context_active_acquire(ce, - engine->i915->ggtt.pin_bias | - PIN_OFFSET_BIAS | - PIN_HIGH); - if (ret) - goto err; + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); vaddr = i915_gem_object_pin_map(ce->state->obj, i915_coherent_map_type(engine->i915) | I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto unpin_active; - } + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_map; - - ce->lrc_desc = lrc_descriptor(ce, engine); + ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; - -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); -unpin_active: - intel_context_active_release(ce); -err: - return ret; } static int execlists_context_pin(struct intel_context *ce) @@ -1501,8 +2908,16 @@ static int execlists_context_pin(struct intel_context *ce) return __execlists_context_pin(ce, ce->engine); } +static int execlists_context_alloc(struct intel_context *ce) +{ + return __execlists_context_alloc(ce, ce->engine); +} + static void execlists_context_reset(struct intel_context *ce) { + CE_TRACE(ce, "reset\n"); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + /* * Because we emit WA_TAIL_DWORDS there may be a disparity * between our bookkeeping in ce->ring->head and ce->ring->tail and @@ -1519,11 +2934,19 @@ static void execlists_context_reset(struct intel_context *ce) * So to avoid that we reset the context images upon resume. For * simplicity, we just zero everything out. */ - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); + + /* Scrub away the garbage */ + execlists_init_reg_state(ce->lrc_reg_state, + ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine); + + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { + .alloc = execlists_context_alloc, + .pin = execlists_context_pin, .unpin = execlists_context_unpin, @@ -1538,7 +2961,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1554,7 +2977,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_timeline(rq)->hwsp_offset; *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -1566,66 +2989,11 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = - i915_vm_to_ppgtt(rq->gem_context->vm); - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - /* Be doubly sure the LRI have landed before proceeding */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Re-invalidate the TLB for luck */ - return engine->emit_flush(rq, EMIT_INVALIDATE); -} - static int execlists_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); /* * Flush enough space to reduce the likelihood of waiting after @@ -1643,10 +3011,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->gem_context->vm)) - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - else - ret = emit_pdps(request); + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; @@ -1676,7 +3041,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) /* NB no one else is allowed to scribble over scratch + 256! */ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; *batch++ = MI_LOAD_REGISTER_IMM(1); @@ -1690,7 +3056,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; return batch; @@ -1724,11 +3091,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* Actual scratch location is at 128 bytes offset */ batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - i915_scratch_offset(engine->i915) + - 2 * CACHELINE_BYTES); + LRC_PPHWSP_SCRATCH_ADDR); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1794,6 +3160,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + LRC_PPHWSP_SCRATCH_ADDR); + batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -1874,7 +3248,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1914,6 +3288,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return 0; switch (INTEL_GEN(engine->i915)) { + case 12: case 11: return 0; case 10: @@ -1970,30 +3345,33 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) static void enable_execlists(struct intel_engine_cs *engine) { + u32 mode; + + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ if (INTEL_GEN(engine->i915) >= 11) - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); else - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); + ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); - ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - ENGINE_WRITE(engine, - RING_HWS_PGA, - i915_ggtt_offset(engine->status_page.vma)); + ENGINE_WRITE_FW(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); + + engine->context_tag = 0; } static bool unexpected_starting_state(struct intel_engine_cs *engine) { bool unexpected = false; - if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) { + if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } @@ -2026,8 +3404,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; unsigned long flags; - GEM_TRACE("%s: depth<-%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth<-%d\n", + atomic_read(&execlists->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2041,34 +3419,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); GEM_BUG_ON(!reset_in_progress(execlists)); - intel_engine_stop_cs(engine); - /* And flush any current direct submission. */ spin_lock_irqsave(&engine->active.lock, flags); spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static bool lrc_regs_ok(const struct i915_request *rq) -{ - const struct intel_ring *ring = rq->ring; - const u32 *regs = rq->hw_context->lrc_reg_state; - - /* Quick spot check for the common signs of context corruption */ - - if (regs[CTX_RING_BUFFER_CONTROL + 1] != - (RING_CTL_SIZE(ring->size) | RING_VALID)) - return false; - - if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) - return false; - return true; + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * FIXME: Wa for more modern gens needs to be validated + */ + intel_engine_stop_cs(engine); } -static void reset_csb_pointers(struct intel_engine_execlists *execlists) +static void reset_csb_pointers(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; const unsigned int reset_value = execlists->csb_size - 1; + ring_set_paused(engine, 0); + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -2082,27 +3458,35 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); } -static struct i915_request *active_request(struct i915_request *rq) +static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) { - const struct list_head * const list = &rq->engine->active.requests; - const struct intel_context * const context = rq->hw_context; - struct i915_request *active = NULL; - - list_for_each_entry_from_reverse(rq, list, sched.link) { - if (i915_request_completed(rq)) - break; + int x; - if (rq->hw_context != context) - break; - - active = rq; + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; } +} - return active; +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + __reset_stop_ring(regs, engine); } static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -2110,38 +3494,42 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; - u32 *regs; + + mb(); /* paranoia: read the CSB pointers from after the reset */ + clflush(execlists->csb_write); + mb(); process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(&engine->execlists); + reset_csb_pointers(engine); /* * Save the currently executing context, even if we completed * its request, it was still running at the time of the * reset and will have been clobbered. */ - if (!port_isset(execlists->port)) - goto out_clear; + rq = execlists_active(execlists); + if (!rq) + goto unwind; - rq = port_request(execlists->port); - ce = rq->hw_context; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - /* - * Catch up with any missed context-switch interrupts. - * - * Ideally we would just read the remaining CSB entries now that we - * know the gpu is idle. However, the CSB registers are sometimes^W - * often trashed across a GPU reset! Instead we have to rely on - * guessing the missed context-switch events by looking at what - * requests were completed. - */ - execlists_cancel_port_requests(execlists); + ce = rq->context; + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - rq = active_request(rq); - if (!rq) + if (i915_request_completed(rq)) { + /* Idle context; tidy up the ring so we can restart afresh */ + ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); goto out_replay; + } + + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(ce->ring->head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2155,7 +3543,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * Otherwise, if we have not started yet, the request should replay * perfectly and we do not need to flag the result as being erroneous. */ - if (!i915_request_started(rq) && lrc_regs_ok(rq)) + if (!i915_request_started(rq)) goto out_replay; /* @@ -2169,8 +3557,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - i915_reset_request(rq, stalled); - if (!stalled && lrc_regs_ok(rq)) + __i915_request_reset(rq, stalled); + if (!stalled) goto out_replay; /* @@ -2181,33 +3569,28 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = ce->lrc_reg_state; - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + restore_default_state(ce, engine); out_replay: - /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = - rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; + ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); + __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ +unwind: /* Push back any incomplete requests for replay after the reset. */ + cancel_port_requests(execlists); __unwind_incomplete_requests(engine); - -out_clear: - execlists_clear_all_active(execlists); } -static void execlists_reset(struct intel_engine_cs *engine, bool stalled) +static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) { unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); spin_lock_irqsave(&engine->active.lock, flags); @@ -2221,14 +3604,14 @@ static void nop_submission_tasklet(unsigned long data) /* The driver is wedged; don't process any more events. */ } -static void execlists_cancel_requests(struct intel_engine_cs *engine) +static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -2249,12 +3632,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { @@ -2262,16 +3641,18 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); + mark_eio(rq); __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); i915_priolist_free(p); } + /* On-hold requests will be flushed to timeline upon their release */ + list_for_each_entry(rq, &engine->active.hold, sched.link) + mark_eio(rq); + /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(&execlists->virtual))) { struct virtual_engine *ve = @@ -2281,13 +3662,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) RB_CLEAR_NODE(rb); spin_lock(&ve->base.active.lock); - if (ve->request) { - ve->request->engine = engine; - __i915_request_submit(ve->request); - dma_fence_set_error(&ve->request->fence, -EIO); - i915_request_mark_complete(ve->request); + rq = fetch_and_zero(&ve->request); + if (rq) { + mark_eio(rq); + + rq->engine = engine; + __i915_request_submit(rq); + i915_request_put(rq); + ve->base.execlists.queue_priority_hint = INT_MIN; - ve->request = NULL; } spin_unlock(&ve->base.active.lock); } @@ -2296,7 +3679,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; @@ -2320,13 +3702,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) if (__tasklet_enable(&execlists->tasklet)) /* And kick in case we missed a new request submission. */ tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); } -static int gen8_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +static int gen8_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { u32 *cs; @@ -2360,7 +3742,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, return 0; } -static int gen9_emit_bb_start(struct i915_request *rq, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -2421,7 +3803,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) } *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ intel_ring_advance(request, cs); @@ -2432,9 +3814,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2456,7 +3835,7 @@ static int gen8_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; /* * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL @@ -2489,7 +3868,7 @@ static int gen8_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); if (dc_flush_wa) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); @@ -2499,6 +3878,150 @@ static int gen8_emit_flush_render(struct i915_request *request, return 0; } +static int gen11_emit_flush_render(struct i915_request *request, + u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + return 0; +} + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | 1 << 8 | state; +} + +static int gen12_emit_flush_render(struct i915_request *request, + u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 8); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Prevent the pre-parser from skipping past the TLB + * invalidate and loading a stale page for the batch + * buffer / request payload. + */ + *cs++ = preparser_disable(true); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + + *cs++ = preparser_disable(false); + intel_ring_advance(request, cs); + + /* + * Wa_1604544889:tgl + */ + if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { + flags = 0; + flags |= PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + } + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -2514,15 +4037,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - 0); + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + return cs; +} + +static __always_inline u32* +gen8_emit_fini_breadcrumb_footer(struct i915_request *request, + u32 *cs) +{ *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2530,22 +4066,92 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + 0); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + +static u32 * +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE); - cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL, - 0); + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + +/* + * Note that the CS instruction pre-parser will not stall on the breadcrumb + * flush and will continue pre-fetching the instructions after it before the + * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at + * BB_START/END instructions, so, even though we might pre-fetch the pre-amble + * of the next request before the memory has been flushed, we're guaranteed that + * we won't access the batch itself too early. + * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, + * so, if the current request is modifying an instruction in the next request on + * the same intel_context, we might pre-fetch and then execute the pre-update + * instruction. To avoid this, the users of self-modifying code should either + * disable the parser around the code emitting the memory writes, via a new flag + * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For + * the in-kernel use-cases we've opted to use a separate context, see + * reloc_gpu() as an example. + * All the above applies only to the instructions themselves. Non-inline data + * used by the instructions is not pre-fetched. + */ +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + + return cs; +} + +static __always_inline u32* +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +{ *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = gen12_emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2553,57 +4159,85 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static int gen8_init_rcs_context(struct i915_request *rq) +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { - int ret; + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + 0); - ret = intel_engine_emit_ctx_wa(rq); - if (ret) - return ret; + return gen12_emit_fini_breadcrumb_footer(request, cs); +} - ret = intel_rcs_context_init_mocs(rq); - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So generate an error and carry on. - */ - if (ret) - DRM_ERROR("MOCS failed to program: expect performance issues.\n"); +static u32 * +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_HDC_PIPELINE_FLUSH); - return i915_gem_render_state_emit(rq); + return gen12_emit_fini_breadcrumb_footer(request, cs); } static void execlists_park(struct intel_engine_cs *engine) { - intel_engine_park(engine); + cancel_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->cancel_requests = execlists_cancel_requests; engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; + engine->reset.rewind = execlists_reset_rewind; + engine->reset.cancel = execlists_reset_cancel; engine->reset.finish = execlists_reset_finish; engine->park = execlists_park; engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) + if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (engine->preempt_context && - HAS_LOGICAL_RING_PREEMPTION(engine->i915)) - engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + } + + if (INTEL_GEN(engine->i915) >= 12) + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; + + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; +} + +static void execlists_shutdown(struct intel_engine_cs *engine) +{ + /* Synchronise with residual timers and any softirq they raise */ + del_timer_sync(&engine->execlists.timer); + del_timer_sync(&engine->execlists.preempt); + tasklet_kill(&engine->execlists.tasklet); } -static void execlists_destroy(struct intel_engine_cs *engine) +static void execlists_release(struct intel_engine_cs *engine) { + execlists_shutdown(engine); + intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); - kfree(engine); } static void @@ -2611,19 +4245,16 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->destroy = execlists_destroy; engine->resume = execlists_resume; - engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; - engine->reset.finish = execlists_reset_finish; - engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; + if (INTEL_GEN(engine->i915) >= 12) + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2638,10 +4269,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) * until a more refined solution exists. */ } - if (IS_GEN(engine->i915, 8)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen9_emit_bb_start; } static inline void @@ -2665,40 +4292,41 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -int intel_execlists_submission_setup(struct intel_engine_cs *engine) +static void rcs_submission_override(struct intel_engine_cs *engine) { - /* Intentionally left blank. */ - engine->buffer = NULL; - - tasklet_init(&engine->execlists.tasklet, - execlists_submission_tasklet, (unsigned long)engine); - - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine); - - if (engine->class == RENDER_CLASS) { - engine->init_context = gen8_init_rcs_context; + switch (INTEL_GEN(engine->i915)) { + case 12: + engine->emit_flush = gen12_emit_flush_render; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; + case 11: + engine->emit_flush = gen11_emit_flush_render; + engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; + break; + default: engine->emit_flush = gen8_emit_flush_render; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + break; } - - return 0; } -int intel_execlists_submission_init(struct intel_engine_cs *engine) +int intel_execlists_submission_setup(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - int ret; - ret = intel_engine_init_common(engine); - if (ret) - return ret; + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); - intel_engine_init_workarounds(engine); - intel_engine_init_whitelist(engine); + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); + + if (engine->class == RENDER_CLASS) + rcs_submission_override(engine); if (intel_init_workaround_bb(engine)) /* @@ -2718,11 +4346,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_ELSP(base)); } - execlists->preempt_complete_status = ~0u; - if (engine->preempt_context) - execlists->preempt_complete_status = - upper_32_bits(engine->preempt_context->lrc_desc); - execlists->csb_status = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; @@ -2734,12 +4357,15 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(execlists); + reset_csb_pointers(engine); + + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = execlists_release; return 0; } -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -2747,6 +4373,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 12: + indirect_ctx_offset = + GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 11: indirect_ctx_offset = GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -2768,86 +4398,53 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm); - bool rcs = engine->class == RENDER_CLASS; - u32 base = engine->mmio_base; - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED; +static void init_common_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool inhibit) +{ + u32 ctl; + + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + if (INTEL_GEN(engine->i915) < 11) + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; + + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; +} - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[CTX_RCS_INDIRECT_CTX + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } +static void init_wa_bb_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + u32 pos_bb_per_ctx) +{ + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - regs[CTX_BB_PER_CTX_PTR + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } + regs[pos_bb_per_ctx] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + if (wa_ctx->indirect_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); + regs[pos_bb_per_ctx + 2] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); + regs[pos_bb_per_ctx + 4] = + intel_lr_indirect_ctx_offset(engine) << 6; + } +} + +static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) +{ if (i915_vm_is_4lvl(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and @@ -2860,17 +4457,43 @@ static void execlists_init_reg_state(u32 *regs, ASSIGN_CTX_PDP(ppgtt, regs, 1); ASSIGN_CTX_PDP(ppgtt, regs, 0); } +} - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} - i915_oa_init_reg_state(engine, ce, regs); - } +static void execlists_init_reg_state(u32 *regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool inhibit) +{ + /* + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). + */ + set_offsets(regs, reg_offsets(engine), engine, inhibit); - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); + init_common_reg_state(regs, engine, ring, inhibit); + init_ppgtt_reg_state(regs, vm_alias(ce->vm)); + + init_wa_bb_reg_state(regs, engine, + INTEL_GEN(engine->i915) >= 12 ? + GEN12_CTX_BB_PER_CTX_PTR : + CTX_BB_PER_CTX_PTR); + + __reset_stop_ring(regs, engine); } static int @@ -2879,8 +4502,8 @@ populate_lr_context(struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { + bool inhibit = true; void *vaddr; - u32 *regs; int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); @@ -2890,13 +4513,9 @@ populate_lr_context(struct intel_context *ce, return ret; } + set_redzone(vaddr, engine); + if (engine->default_state) { - /* - * We only want to copy over the template context state; - * skipping over the headers reserved for GuC communication, - * leaving those as zero. - */ - const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, @@ -2906,81 +4525,62 @@ populate_lr_context(struct intel_context *ce, goto err_unpin_ctx; } - memcpy(vaddr + start, defaults + start, engine->context_size); + memcpy(vaddr, defaults, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); + inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ce->gem_context == engine->i915->preempt_context && - INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ce, engine, ring, inhibit); ret = 0; err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, - LRC_HEADER_PAGES * PAGE_SIZE, - engine->context_size); + __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); return ret; } -static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) -{ - if (ctx->timeline) - return i915_timeline_get(ctx->timeline); - else - return i915_timeline_create(ctx->i915, NULL); -} - -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; + struct intel_ring *ring; struct i915_vma *vma; u32 context_size; - struct intel_ring *ring; - struct i915_timeline *timeline; int ret; - if (ce->state) - return 0; - + GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - /* - * Before the actual start of the context image, we insert a few pages - * for our own use and for sharing with the GuC. - */ - context_size += LRC_HEADER_PAGES * PAGE_SIZE; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + context_size += I915_GTT_PAGE_SIZE; /* for redzone */ ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); - vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; } - timeline = get_timeline(ce->gem_context); - if (IS_ERR(timeline)) { - ret = PTR_ERR(timeline); - goto error_deref_obj; + if (!ce->timeline) { + struct intel_timeline *tl; + + tl = intel_timeline_create(engine->gt, NULL); + if (IS_ERR(tl)) { + ret = PTR_ERR(tl); + goto error_deref_obj; + } + + ce->timeline = tl; } - ring = intel_engine_create_ring(engine, - timeline, - ce->gem_context->ring_size); - i915_timeline_put(timeline); + ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -3022,22 +4622,24 @@ static void virtual_context_destroy(struct kref *kref) for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; + unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irqsave(&sibling->active.lock, flags); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irqrestore(&sibling->active.lock, flags); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); if (ve->context.state) __execlists_context_fini(&ve->context); + intel_context_fini(&ve->context); kfree(ve->bonds); kfree(ve); @@ -3065,8 +4667,16 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) return; swap(ve->siblings[swp], ve->siblings[0]); - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (!intel_engine_has_relative_mmio(ve->siblings[0])) + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); +} + +static int virtual_context_alloc(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return __execlists_context_alloc(ce, ve->siblings[0]); } static int virtual_context_pin(struct intel_context *ce) @@ -3090,6 +4700,8 @@ static void virtual_context_enter(struct intel_context *ce) for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->timeline); } static void virtual_context_exit(struct intel_context *ce) @@ -3097,11 +4709,15 @@ static void virtual_context_exit(struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); unsigned int n; + intel_timeline_exit(ce->timeline); + for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); } static const struct intel_context_ops virtual_context_ops = { + .alloc = virtual_context_alloc, + .pin = virtual_context_pin, .unpin = execlists_context_unpin, @@ -3128,10 +4744,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = ve->siblings[0]->mask; } - GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", - ve->base.name, - rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); return mask; } @@ -3219,23 +4834,40 @@ submit_engine: static void virtual_submit_request(struct i915_request *rq) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + struct i915_request *old; + unsigned long flags; - GEM_TRACE("%s: rq=%llx:%lld\n", - ve->base.name, - rq->fence.context, - rq->fence.seqno); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", + rq->fence.context, + rq->fence.seqno); GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - GEM_BUG_ON(ve->request); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); + spin_lock_irqsave(&ve->base.active.lock, flags); - ve->base.execlists.queue_priority_hint = rq_prio(rq); - WRITE_ONCE(ve->request, rq); + old = ve->request; + if (old) { /* background completion event from preempt-to-busy */ + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } - list_move_tail(&rq->sched.link, virtual_queue(ve)); + if (i915_request_completed(rq)) { + __i915_request_submit(rq); - tasklet_schedule(&ve->base.execlists.tasklet); + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } else { + ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->request = i915_request_get(rq); + + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + + tasklet_schedule(&ve->base.execlists.tasklet); + } + + spin_unlock_irqrestore(&ve->base.active.lock, flags); } static struct ve_bond * @@ -3256,23 +4888,26 @@ static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + intel_engine_mask_t allowed, exec; struct ve_bond *bond; + allowed = ~to_request(signal)->engine->mask; + bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) { - intel_engine_mask_t old, new, cmp; + if (bond) + allowed &= bond->sibling_mask; - cmp = READ_ONCE(rq->execution_mask); - do { - old = cmp; - new = cmp & bond->sibling_mask; - } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old); - } + /* Restrict the bonded request to run on only the available engines */ + exec = READ_ONCE(rq->execution_mask); + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) + ; + + /* Prevent the master from being re-run on the bonded engines */ + to_request(signal)->execution_mask &= ~allowed; } struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; @@ -3283,18 +4918,21 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, return ERR_PTR(-EINVAL); if (count == 1) - return intel_context_create(ctx, siblings[0]); + return intel_context_create(siblings[0]); ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); - ve->base.i915 = ctx->i915; + ve->base.i915 = siblings[0]->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; + ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - ve->base.flags = I915_ENGINE_IS_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; /* * The decision on whether to submit a request using semaphores @@ -3314,7 +4952,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - + intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); ve->base.cops = &virtual_context_ops; @@ -3330,7 +4968,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, virtual_submission_tasklet, (unsigned long)ve); - intel_context_init(&ve->context, ctx, &ve->base); + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { struct intel_engine_cs *sibling = siblings[n]; @@ -3391,8 +5029,12 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; ve->base.emit_fini_breadcrumb_dw = sibling->emit_fini_breadcrumb_dw; + + ve->base.flags = sibling->flags; } + ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + return &ve->context; err_put: @@ -3401,14 +5043,12 @@ err_put: } struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src) +intel_execlists_clone_virtual(struct intel_engine_cs *src) { struct virtual_engine *se = to_virtual_engine(src); struct intel_context *dst; - dst = intel_execlists_create_virtual(ctx, - se->siblings, + dst = intel_execlists_create_virtual(se->siblings, se->num_siblings); if (IS_ERR(dst)) return dst; @@ -3466,6 +5106,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, return 0; } +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3554,6 +5206,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, u32 head, bool scrub) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + /* * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -3562,16 +5216,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - if (scrub) { - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); - } + if (scrub) + restore_default_state(ce, engine); /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = head; @@ -3580,6 +5226,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, __execlists_update_reg_state(ce, engine); } +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == + intel_execlists_set_default_submission; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2bba82bcc16..dfbc214e14f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -66,6 +67,12 @@ struct intel_engine_cs; #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) +#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, @@ -76,33 +83,17 @@ enum { void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int intel_execlists_submission_setup(struct intel_engine_cs *engine); -int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ - -/* - * We allocate a header at the start of the context image for our own - * use, therefore the actual location of the logical state is offset - * from the start of the VMA. The layout is - * - * | [guc] | [hwsp] [logical state] | - * |<- our header ->|<- context image ->| - * - */ -/* The first page is used for sharing data with the GuC */ -#define LRC_GUCSHR_PN (0) -#define LRC_GUCSHR_SZ (1) /* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_SZ (1) -/* Finally we have the logical state for the context */ +/* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) -/* - * Currently we include the PPHWSP in __intel_engine_context_size() so - * the size of the header is synonymous with the start of the PPHWSP. - */ -#define LRC_HEADER_PAGES LRC_PPHWSP_PN +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) void intel_execlists_set_default_submission(struct intel_engine_cs *engine); @@ -119,16 +110,21 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max); struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src); +intel_execlists_clone_virtual(struct intel_engine_cs *src); int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling); + +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 6bf34738b4e5..08a3be65f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,60 +9,47 @@ #include <linux/types.h> -/* GEN8+ Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) +/* GEN8 to GEN11 Reg State Context */ +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_START (0x08 + 1) +#define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_STATE (0x10 + 1) +#define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_PDP3_UDW (0x24 + 1) +#define CTX_PDP3_LDW (0x26 + 1) +#define CTX_PDP2_UDW (0x28 + 1) +#define CTX_PDP2_LDW (0x2a + 1) +#define CTX_PDP1_UDW (0x2c + 1) +#define CTX_PDP1_LDW (0x2e + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) +#define CTX_R_PWR_CLK_STATE (0x42 + 1) + +#define GEN9_CTX_RING_MI_MODE 0x54 + +/* GEN12+ Reg State Context */ +#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \ } while (0) #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = px_dma(ppgtt->pd); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A +#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 1f9db50b1869..eeef90b55c64 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,8 +23,10 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_ring.h" /* structures required */ struct drm_i915_mocs_entry { @@ -61,6 +63,10 @@ struct drm_i915_mocs_table { #define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ +/* + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means + * the same as LE_UC + */ #define LE_0_PAGETABLE _LE_CACHEABILITY(0) #define LE_1_UC _LE_CACHEABILITY(1) #define LE_2_WT _LE_CACHEABILITY(2) @@ -99,8 +105,9 @@ struct drm_i915_mocs_table { * of bspec. * * Entries not part of the following tables are undefined as far as - * userspace is concerned and shouldn't be relied upon. For the time - * being they will be initialized to PTE. + * userspace is concerned and shouldn't be relied upon. For Gen < 12 + * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for + * PTE and will be initialized to an invalid value. * * The last two entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older @@ -120,7 +127,7 @@ struct drm_i915_mocs_table { LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ L3_3_WB) -static const struct drm_i915_mocs_entry skylake_mocs_table[] = { +static const struct drm_i915_mocs_entry skl_mocs_table[] = { GEN9_MOCS_ENTRIES, MOCS_ENTRY(I915_MOCS_CACHED, LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), @@ -136,14 +143,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { }; #define GEN11_MOCS_ENTRIES \ - /* Base - Uncached (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_UNCACHED, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_1_UC), \ - /* Base - L3 + LeCC:PAT (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_1_LLC, \ - L3_3_WB), \ + /* Entries 0 and 1 are defined per-platform */ \ /* Base - L3 + LLC */ \ MOCS_ENTRY(2, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ @@ -200,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { MOCS_ENTRY(15, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ L3_3_WB), \ - /* Bypass LLC - Uncached (EHL+) */ \ - MOCS_ENTRY(16, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_1_UC), \ - /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ - MOCS_ENTRY(17, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_3_WB), \ /* Self-Snoop - L3 + LLC */ \ MOCS_ENTRY(18, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ @@ -241,79 +233,92 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ L3_1_UC) -static const struct drm_i915_mocs_entry icelake_mocs_table[] = { +static const struct drm_i915_mocs_entry tgl_mocs_table[] = { + /* Base - Error (Reserved for Non-Use) */ + MOCS_ENTRY(0, 0x0, 0x0), + /* Base - Reserved */ + MOCS_ENTRY(1, 0x0, 0x0), + + GEN11_MOCS_ENTRIES, + + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct drm_i915_mocs_entry icl_mocs_table[] = { + /* Base - Uncached (Deprecated) */ + MOCS_ENTRY(I915_MOCS_UNCACHED, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* Base - L3 + LeCC:PAT (Deprecated) */ + MOCS_ENTRY(I915_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_1_LLC, + L3_3_WB), + GEN11_MOCS_ENTRIES }; -/** - * get_mocs_settings() - * @dev_priv: i915 device. - * @table: Output table that will be made to point at appropriate - * MOCS values for the device. - * - * This function will return the values of the MOCS table that needs to - * be programmed for the platform. It will return the values that need - * to be programmed and if they need to be programmed. - * - * Return: true if there are applicable MOCS settings for the device. - */ -static bool get_mocs_settings(struct drm_i915_private *dev_priv, +static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - bool result = false; - - if (INTEL_GEN(dev_priv) >= 11) { - table->size = ARRAY_SIZE(icelake_mocs_table); - table->table = icelake_mocs_table; + if (INTEL_GEN(i915) >= 12) { + table->size = ARRAY_SIZE(tgl_mocs_table); + table->table = tgl_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + } else if (IS_GEN(i915, 11)) { + table->size = ARRAY_SIZE(icl_mocs_table); + table->table = icl_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; - } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { - table->size = ARRAY_SIZE(skylake_mocs_table); + } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { + table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->table = skylake_mocs_table; - result = true; - } else if (IS_GEN9_LP(dev_priv)) { + table->table = skl_mocs_table; + } else if (IS_GEN9_LP(i915)) { table->size = ARRAY_SIZE(broxton_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; - result = true; } else { - WARN_ONCE(INTEL_GEN(dev_priv) >= 9, + WARN_ONCE(INTEL_GEN(i915) >= 9, "Platform that should have a MOCS table does not.\n"); + return false; } + if (GEM_DEBUG_WARN_ON(table->size > table->n_entries)) + return false; + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ - if (IS_GEN(dev_priv, 9)) { + if (IS_GEN(i915, 9)) { int i; for (i = 0; i < table->size; i++) - if (WARN_ON(table->table[i].l3cc_value & - (L3_ESC(1) | L3_SCC(0x7)))) + if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value & + (L3_ESC(1) | L3_SCC(0x7)))) return false; } - return result; -} - -static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) -{ - switch (engine_id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine_id); - return INVALID_MMIO_REG; - } + return true; } /* @@ -323,90 +328,47 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].control_value; return table->table[I915_MOCS_PTE].control_value; } -/** - * intel_mocs_init_engine() - emit the mocs control table - * @engine: The engine for whom to emit the registers. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - */ -void intel_mocs_init_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_mocs_table table; - unsigned int index; - u32 unused_value; +#define for_each_mocs(mocs, t, i) \ + for (i = 0; \ + i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\ + i++) - if (!get_mocs_settings(dev_priv, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].control_value; - - for (index = 0; index < table.size; index++) { - u32 value = get_entry_control(&table, index); - - I915_WRITE(mocs_register(engine->id, index), value); - } +static void __init_mocs_table(struct intel_uncore *uncore, + const struct drm_i915_mocs_table *table, + u32 addr) +{ + unsigned int i; + u32 mocs; - /* All remaining entries are also unused */ - for (; index < table.n_entries; index++) - I915_WRITE(mocs_register(engine->id, index), unused_value); + for_each_mocs(mocs, table, i) + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); } -/** - * emit_mocs_control_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - * - * Return: 0 on success, otherwise the error status. - */ -static int emit_mocs_control_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) +static u32 mocs_offset(const struct intel_engine_cs *engine) { - enum intel_engine_id engine = rq->engine->id; - unsigned int index; - u32 unused_value; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].control_value; - - cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); - - for (index = 0; index < table->size; index++) { - u32 value = get_entry_control(table, index); - - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = value; - } - - /* All remaining entries are also unused */ - for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = unused_value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); + static const u32 offset[] = { + [RCS0] = __GEN9_RCS0_MOCS0, + [VCS0] = __GEN9_VCS0_MOCS0, + [VCS1] = __GEN9_VCS1_MOCS0, + [VECS0] = __GEN9_VECS0_MOCS0, + [BCS0] = __GEN9_BCS0_MOCS0, + [VCS2] = __GEN11_VCS2_MOCS0, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset)); + return offset[engine->id]; +} - return 0; +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + __init_mocs_table(engine->uncore, table, mocs_offset(engine)); } /* @@ -416,159 +378,81 @@ static int emit_mocs_control_table(struct i915_request *rq, static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; return table->table[I915_MOCS_PTE].l3cc_value; } -static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, - u16 low, - u16 high) +static inline u32 l3cc_combine(u16 low, u16 high) { - return low | high << 16; + return low | (u32)high << 16; } -/** - * emit_mocs_l3cc_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. This register set is - * programmed in pairs. - * - * Return: 0 on success, otherwise the error status. - */ -static int emit_mocs_l3cc_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) +#define for_each_l3cc(l3cc, t, i) \ + for (i = 0; \ + i < ((t)->n_entries + 1) / 2 ? \ + (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \ + get_entry_l3cc((t), 2 * i + 1))), 1 : \ + 0; \ + i++) + +static void init_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - u16 unused_value; + struct intel_uncore *uncore = engine->uncore; unsigned int i; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; + u32 l3cc; - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].l3cc_value; - - cs = intel_ring_begin(rq, 2 + table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); - - for (i = 0; i < table->size / 2; i++) { - u16 low = get_entry_l3cc(table, 2 * i); - u16 high = get_entry_l3cc(table, 2 * i + 1); + for_each_l3cc(l3cc, table, i) + intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); +} - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, high); - } +void intel_mocs_init_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_mocs_table table; - /* Odd table size - 1 left over */ - if (table->size & 0x01) { - u16 low = get_entry_l3cc(table, 2 * i); + /* Called under a blanket forcewake */ + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, unused_value); - i++; - } + if (!get_mocs_settings(engine->i915, &table)) + return; - /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) { - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, unused_value, unused_value); - } + /* Platforms with global MOCS do not need per-engine initialization. */ + if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915)) + init_mocs_table(engine, &table); - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); + if (engine->class == RENDER_CLASS) + init_l3cc_table(engine, &table); +} - return 0; +static u32 global_mocs_offset(void) +{ + return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } -/** - * intel_mocs_init_l3cc_table() - program the mocs control table - * @dev_priv: i915 device private - * - * This function simply programs the mocs registers for the given table - * starting at the given address. This register set is programmed in pairs. - * - * These registers may get programmed more than once, it is simpler to - * re-program 32 registers than maintain the state of when they were programmed. - * We are always reprogramming with the same values and this only on context - * start. - * - * Return: Nothing. - */ -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) +static void init_global_mocs(struct intel_gt *gt) { struct drm_i915_mocs_table table; - unsigned int i; - u16 unused_value; - if (!get_mocs_settings(dev_priv, &table)) + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + if (IS_DGFX(gt->i915)) return; - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].l3cc_value; - - for (i = 0; i < table.size / 2; i++) { - u16 low = get_entry_l3cc(&table, 2 * i); - u16 high = get_entry_l3cc(&table, 2 * i + 1); - - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); - } - - /* Odd table size - 1 left over */ - if (table.size & 0x01) { - u16 low = get_entry_l3cc(&table, 2 * i); - - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); - i++; - } + if (!get_mocs_settings(gt->i915, &table)) + return; - /* All remaining entries are also unused */ - for (; i < table.n_entries / 2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, unused_value)); + __init_mocs_table(gt->uncore, &table, global_mocs_offset()); } -/** - * intel_rcs_context_init_mocs() - program the MOCS register. - * @rq: Request to set up the MOCS tables for. - * - * This function will emit a batch buffer with the values required for - * programming the MOCS register values for all the currently supported - * rings. - * - * These registers are partially stored in the RCS context, so they are - * emitted at the same time so that when a context is created these registers - * are set up. These registers have to be emitted into the start of the - * context as setting the ELSP will re-init some of these registers back - * to the hw values. - * - * Return: 0 on success, otherwise the error status. - */ -int intel_rcs_context_init_mocs(struct i915_request *rq) +void intel_mocs_init(struct intel_gt *gt) { - struct drm_i915_mocs_table t; - int ret; - - if (get_mocs_settings(rq->i915, &t)) { - /* Program the RCS control registers */ - ret = emit_mocs_control_table(rq, &t); - if (ret) - return ret; - - /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(rq, &t); - if (ret) - return ret; - } - - return 0; + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) + init_global_mocs(gt); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_mocs.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 0913704a1af2..83371f3e6ba1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,12 +49,10 @@ * context handling keep the MOCS in step. */ -struct drm_i915_private; -struct i915_request; struct intel_engine_cs; +struct intel_gt; -int intel_rcs_context_init_mocs(struct i915_request *rq); -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); +void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c new file mode 100644 index 000000000000..f86f7e68ce5e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> + +#include "i915_trace.h" +#include "intel_gtt.h" +#include "gen6_ppgtt.h" +#include "gen8_ppgtt.h" + +struct i915_page_table *alloc_pt(struct i915_address_space *vm) +{ + struct i915_page_table *pt; + + pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); + if (unlikely(!pt)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, &pt->base))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + + atomic_set(&pt->used, 0); + return pt; +} + +struct i915_page_directory *__alloc_pd(size_t sz) +{ + struct i915_page_directory *pd; + + pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); + if (unlikely(!pd)) + return NULL; + + spin_lock_init(&pd->lock); + return pd; +} + +struct i915_page_directory *alloc_pd(struct i915_address_space *vm) +{ + struct i915_page_directory *pd; + + pd = __alloc_pd(sizeof(*pd)); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + return pd; +} + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) +{ + cleanup_page_dma(vm, pd); + kfree(pd); +} + +static inline void +write_dma_entry(struct i915_page_dma * const pdma, + const unsigned short idx, + const u64 encoded_entry) +{ + u64 * const vaddr = kmap_atomic(pdma->page); + + vaddr[idx] = encoded_entry; + kunmap_atomic(vaddr); +} + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) +{ + /* Each thread pre-pins the pd, and we may have a thread per pde. */ + GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); + + atomic_inc(px_used(pd)); + pd->entry[idx] = to; + write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); +} + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch) +{ + GEM_BUG_ON(atomic_read(px_used(pd)) == 0); + + write_dma_entry(px_base(pd), idx, scratch->encode); + pd->entry[idx] = NULL; + atomic_dec(px_used(pd)); +} + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch) +{ + bool free = false; + + if (atomic_add_unless(&pt->used, -1, 1)) + return false; + + spin_lock(&pd->lock); + if (atomic_dec_and_test(&pt->used)) { + clear_pd_entry(pd, idx, scratch); + free = true; + } + spin_unlock(&pd->lock); + + return free; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + gtt_write_workarounds(gt); + + if (IS_GEN(i915, 6)) + gen6_ppgtt_enable(gt); + else if (IS_GEN(i915, 7)) + gen7_ppgtt_enable(gt); + + return 0; +} + +static struct i915_ppgtt * +__ppgtt_create(struct intel_gt *gt) +{ + if (INTEL_GEN(gt->i915) < 8) + return gen6_ppgtt_create(gt); + else + return gen8_ppgtt_create(gt); +} + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = __ppgtt_create(gt); + if (IS_ERR(ppgtt)) + return ppgtt; + + trace_i915_ppgtt_create(&ppgtt->vm); + + return ppgtt; +} + +static int ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int err; + + if (flags & I915_VMA_ALLOC) { + err = vma->vm->allocate_va_range(vma->vm, + vma->node.start, vma->size); + if (err) + return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + /* Applicable to VLV, and gen8+ */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + wmb(); + + return 0; +} + +static void ppgtt_unbind_vma(struct i915_vma *vma) +{ + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + ppgtt->vm.gt = gt; + ppgtt->vm.i915 = i915; + ppgtt->vm.dma = &i915->drm.pdev->dev; + ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); + + ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; + ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; + ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; + ppgtt->vm.vma_ops.clear_pages = clear_pages; +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c new file mode 100644 index 000000000000..9e303c29d6e3 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -0,0 +1,776 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/pm_runtime.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_sideband.h" + +/** + * DOC: RC6 + * + * RC6 is a special power stage which allows the GPU to enter an very + * low-voltage mode when idle, using down to 0V while at this stage. This + * stage is entered automatically when the GPU is idle when RC6 support is + * enabled, and as soon as new workload arises GPU wakes up automatically as + * well. + * + * There are different RC6 modes available in Intel GPU, which differentiate + * among each other with the latency required to enter and leave RC6 and + * voltage consumed by the GPU in different states. + * + * The combination of the following flags define which states GPU is allowed + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and + * RC6pp is deepest RC6. Their support by hardware varies according to the + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one + * which brings the most power savings; deeper states save more power, but + * require higher latency to switch to and wake up. + */ + +static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) +{ + return container_of(rc6, struct intel_gt, rc6); +} + +static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->uncore; +} + +static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->i915; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static void gen11_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it under 10us for Icelake, similar to + * Broadwell+, To be conservative, we want to factor in a context + * switch on top (due to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); + + /* 3a: Enable RC6 */ + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE); +} + +static void gen9_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6_mode; + + /* 2b: Program RC6 thresholds.*/ + if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + } else { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode; + + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); +} + +static void gen8_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + + /* 3: Enable RC6 */ + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE; +} + +static void gen6_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6vids, rc6_mask; + int ret; + + set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(i915)) + set(uncore, GEN6_RC6_THRESHOLD, 125000); + else + set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6p_THRESHOLD, 150000); + set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + + /* We don't use those on Haswell */ + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(i915)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(i915)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + rc6->ctl_enable = + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE; + + rc6vids = 0; + ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + if (IS_GEN(i915, 6) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN(i915, 6) && + (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } +} + +/* Check that the pcbr address is not empty. */ +static int chv_rc6_init(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); + + pctx_paddr = (paddr & ~4095); + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + } + + return 0; +} + +static int vlv_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_gem_object *pctx; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + resource_size_t pcbr_offset; + + pcbr_offset = (pcbr & ~4095) - i915->dsm.start; + pctx = i915_gem_object_create_stolen_for_preallocated(i915, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + if (IS_ERR(pctx)) + return PTR_ERR(pctx); + + goto out; + } + + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(i915, pctx_size); + if (IS_ERR(pctx)) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return PTR_ERR(pctx); + } + + GEM_BUG_ON(range_overflows_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = i915->dsm.start + pctx->stolen->start; + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + +out: + rc6->pctx = pctx; + return 0; +} + +static void chv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2a: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + + /* TO threshold set to 500 us (0x186 * 1.28 us) */ + set(uncore, GEN6_RC6_THRESHOLD, 0x186); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* 3: Enable RC6 */ + rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; +} + +static void vlv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + rc6->ctl_enable = + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; +} + +static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + u32 rc6_ctx_base, rc_ctl, rc_sw_target; + bool enable_rc6 = true; + + rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); + rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); + rc_sw_target &= RC_SW_TARGET_STATE_MASK; + rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); + + if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + enable_rc6 = false; + } + + /* + * The exact context size is not known for BXT, so assume a page size + * for this check. + */ + rc6_ctx_base = + intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; + if (!(rc6_ctx_base >= i915->dsm_reserved.start && + rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + enable_rc6 = false; + } + + if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + enable_rc6 = false; + } + + return enable_rc6; +} + +static bool rc6_supported(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!HAS_RC6(i915)) + return false; + + if (intel_vgpu_active(i915)) + return false; + + if (is_mock_gt(rc6_to_gt(rc6))) + return false; + + if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { + dev_notice(i915->drm.dev, + "RC6 and powersaving disabled by BIOS\n"); + return false; + } + + return true; +} + +static void rpm_get(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(rc6->wakeref); + pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = true; +} + +static void rpm_put(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(!rc6->wakeref); + pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = false; +} + +static bool pctx_corrupted(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return false; + + if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) + return false; + + dev_notice(i915->drm.dev, + "RC6 context corruption, disabling runtime power management\n"); + return true; +} + +static void __intel_rc6_disable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (INTEL_GEN(i915) >= 9) + set(uncore, GEN9_PG_ENABLE, 0); + set(uncore, GEN6_RC_CONTROL, 0); + set(uncore, GEN6_RC_STATE, 0); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +void intel_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + int err; + + /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ + rpm_get(rc6); + + if (!rc6_supported(rc6)) + return; + + if (IS_CHERRYVIEW(i915)) + err = chv_rc6_init(rc6); + else if (IS_VALLEYVIEW(i915)) + err = vlv_rc6_init(rc6); + else + err = 0; + + /* Sanitize rc6, ensure it is disabled before we are ready. */ + __intel_rc6_disable(rc6); + + rc6->supported = err == 0; +} + +void intel_rc6_sanitize(struct intel_rc6 *rc6) +{ + if (rc6->enabled) { /* unbalanced suspend/resume */ + rpm_get(rc6); + rc6->enabled = false; + } + + if (rc6->supported) + __intel_rc6_disable(rc6); +} + +void intel_rc6_enable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->supported) + return; + + GEM_BUG_ON(rc6->enabled); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (IS_CHERRYVIEW(i915)) + chv_rc6_enable(rc6); + else if (IS_VALLEYVIEW(i915)) + vlv_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 11) + gen11_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 9) + gen9_rc6_enable(rc6); + else if (IS_BROADWELL(i915)) + gen8_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 6) + gen6_rc6_enable(rc6); + + rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + rc6->ctl_enable = 0; + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (unlikely(pctx_corrupted(rc6))) + return; + + /* rc6 is ready, runtime-pm is go! */ + rpm_put(rc6); + rc6->enabled = true; +} + +void intel_rc6_unpark(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + /* Restore HW timers for automatic RC6 entry while busy */ + set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); +} + +void intel_rc6_park(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + if (unlikely(pctx_corrupted(rc6))) { + intel_rc6_disable(rc6); + return; + } + + if (!rc6->manual) + return; + + /* Turn off the HW timers and go directly to rc6 */ + set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); + set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); +} + +void intel_rc6_disable(struct intel_rc6 *rc6) +{ + if (!rc6->enabled) + return; + + rpm_get(rc6); + rc6->enabled = false; + + __intel_rc6_disable(rc6); +} + +void intel_rc6_fini(struct intel_rc6 *rc6) +{ + struct drm_i915_gem_object *pctx; + + intel_rc6_disable(rc6); + + pctx = fetch_and_zero(&rc6->pctx); + if (pctx) + i915_gem_object_put(pctx); + + if (rc6->wakeref) + rpm_put(rc6); +} + +static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* + * The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + lockdep_assert_held(&uncore->lock); + + /* + * vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + do { + tmp = upper; + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = intel_uncore_read_fw(uncore, reg); + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + } while (upper != tmp && --loop); + + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; + u32 mul, div; + + if (!rc6->supported) + return 0; + + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + mul = 1000000; + div = i915->czclk_freq; + overflow_hw = BIT_ULL(40); + time_hw = vlv_residency_raw(uncore, reg); + } else { + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(i915)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } + + overflow_hw = BIT_ULL(32); + time_hw = intel_uncore_read_fw(uncore, reg); + } + + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = rc6->prev_hw_residency[i]; + rc6->prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += rc6->cur_residency[i]; + rc6->cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); +} + +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_rc6.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h new file mode 100644 index 000000000000..9f0f23fca8af --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_H +#define INTEL_RC6_H + +#include "i915_reg.h" + +struct intel_engine_cs; +struct intel_rc6; + +void intel_rc6_init(struct intel_rc6 *rc6); +void intel_rc6_fini(struct intel_rc6 *rc6); + +void intel_rc6_unpark(struct intel_rc6 *rc6); +void intel_rc6_park(struct intel_rc6 *rc6); + +void intel_rc6_sanitize(struct intel_rc6 *rc6); +void intel_rc6_enable(struct intel_rc6 *rc6); +void intel_rc6_disable(struct intel_rc6 *rc6); + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); + +#endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h new file mode 100644 index 000000000000..bfbb623f7a4f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_TYPES_H +#define INTEL_RC6_TYPES_H + +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "intel_engine_types.h" + +struct drm_i915_gem_object; + +struct intel_rc6 { + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + + u32 ctl_enable; + + struct drm_i915_gem_object *pctx; + + bool supported : 1; + bool enabled : 1; + bool manual : 1; + bool wakeref : 1; +}; + +#endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 4ee032072d4f..5954ecc3207f 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -26,23 +26,13 @@ */ #include "i915_drv.h" -#include "i915_gem_render_state.h" #include "intel_renderstate.h" - -struct intel_render_state { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 batch_offset; - u32 batch_size; - u32 aux_offset; - u32 aux_size; -}; +#include "intel_ring.h" static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { - if (engine->id != RCS0) + if (engine->class != RENDER_CLASS) return NULL; switch (INTEL_GEN(engine->i915)) { @@ -75,7 +65,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine) (batch)[(i)++] = (val); \ } while(0) -static int render_state_setup(struct intel_render_state *so, +static int render_state_setup(struct intel_renderstate *so, struct drm_i915_private *i915) { const struct intel_renderstate_rodata *rodata = so->rodata; @@ -84,11 +74,11 @@ static int render_state_setup(struct intel_render_state *so, u32 *d; int ret; - ret = i915_gem_object_prepare_write(so->obj, &needs_clflush); + ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -166,7 +156,7 @@ static int render_state_setup(struct intel_render_state *so, ret = 0; out: - i915_gem_object_finish_access(so->obj); + i915_gem_object_finish_access(so->vma->obj); return ret; err: @@ -177,59 +167,84 @@ err: #undef OUT_BATCH -int i915_gem_render_state_emit(struct i915_request *rq) +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = rq->engine; - struct intel_render_state so = {}; /* keep the compiler happy */ + struct drm_i915_gem_object *obj; int err; - so.rodata = render_state_get_rodata(engine); - if (!so.rodata) + memset(so, 0, sizeof(*so)); + + so->rodata = render_state_get_rodata(engine); + if (!so->rodata) return 0; - if (so.rodata->batch_items * 4 > PAGE_SIZE) + if (so->rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(so.obj)) - return PTR_ERR(so.obj); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(so.vma)) { - err = PTR_ERR(so.vma); + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); goto err_obj; } - err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err_vma; - err = render_state_setup(&so, rq->i915); + err = render_state_setup(so, engine->i915); if (err) goto err_unpin; + return 0; + +err_unpin: + i915_vma_unpin(so->vma); +err_vma: + i915_vma_close(so->vma); +err_obj: + i915_gem_object_put(obj); + so->vma = NULL; + return err; +} + +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int err; + + if (!so->vma) + return 0; + err = engine->emit_bb_start(rq, - so.batch_offset, so.batch_size, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; - if (so.aux_size > 8) { + if (so->aux_size > 8) { err = engine->emit_bb_start(rq, - so.aux_offset, so.aux_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; } - i915_vma_lock(so.vma); - err = i915_vma_move_to_active(so.vma, rq, 0); - i915_vma_unlock(so.vma); -err_unpin: - i915_vma_unpin(so.vma); -err_vma: - i915_vma_close(so.vma); -err_obj: - i915_gem_object_put(so.obj); + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + return err; } + +void intel_renderstate_fini(struct intel_renderstate *so) +{ + i915_vma_unpin_and_release(&so->vma, 0); +} diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 08f6fea05a2c..5700be69a05a 100644 --- a/drivers/gpu/drm/i915/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -21,11 +21,15 @@ * DEALINGS IN THE SOFTWARE. */ -#ifndef _INTEL_RENDERSTATE_H -#define _INTEL_RENDERSTATE_H +#ifndef _INTEL_RENDERSTATE_H_ +#define _INTEL_RENDERSTATE_H_ #include <linux/types.h> +struct i915_request; +struct intel_engine_cs; +struct i915_vma; + struct intel_renderstate_rodata { const u32 *reloc; const u32 *batch; @@ -44,4 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state; extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; -#endif /* INTEL_RENDERSTATE_H */ +struct intel_renderstate { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine); +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq); +void intel_renderstate_fini(struct intel_renderstate *so); + +#endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 3f907701ef4d..beee0cf89bce 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -7,6 +7,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> +#include "display/intel_display_types.h" #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" @@ -15,26 +16,18 @@ #include "i915_gpu_error.h" #include "i915_irq.h" #include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_reset.h" -#include "intel_guc.h" +#include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 /* XXX How to handle concurrent GGTT updates using tiling registers? */ #define RESET_UNDER_STOP_MACHINE 0 -static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw(uncore, reg, 0, set); -} - -static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw(uncore, reg, clr, 0); -} - static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) { intel_uncore_rmw_fw(uncore, reg, 0, set); @@ -48,28 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; - - lockdep_assert_held(&engine->active.lock); + struct intel_context *hung_ctx = rq->context; if (!i915_request_is_active(rq)) return; + lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->gem_context == hung_ctx) + if (rq->context == hung_ctx) i915_request_skip(rq, -EIO); } -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { - unsigned int score; + struct drm_i915_file_private *file_priv = ctx->file_priv; unsigned long prev_hang; + unsigned int score; + + if (IS_ERR_OR_NULL(file_priv)) + return; - if (i915_gem_context_is_banned(ctx)) + score = 0; + if (banned) score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; prev_hang = xchg(&file_priv->hang_timestamp, jiffies); if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) @@ -84,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static bool context_mark_guilty(struct i915_gem_context *ctx) +static bool mark_guilty(struct i915_request *rq) { + struct i915_gem_context *ctx; unsigned long prev_hang; bool banned; int i; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (!ctx) + return false; + + if (i915_gem_context_is_closed(ctx)) { + intel_context_set_banned(rq->context); + banned = true; + goto out; + } + atomic_inc(&ctx->guilty_count); /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; + if (!i915_gem_context_is_bannable(ctx)) { + banned = false; + goto out; + } + + dev_notice(ctx->i915->drm.dev, + "%s context reset due to GPU hang\n", + ctx->name); /* Record the timestamp for the last N hangs */ prev_hang = ctx->hang_timestamp[0]; @@ -109,81 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx) if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); } - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); + client_mark_guilty(ctx, banned); +out: + i915_gem_context_put(ctx); return banned; } -static void context_mark_innocent(struct i915_gem_context *ctx) +static void mark_innocent(struct i915_request *rq) { - atomic_inc(&ctx->active_count); + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx) + atomic_inc(&ctx->active_count); + rcu_read_unlock(); } -void i915_reset_request(struct i915_request *rq, bool guilty) +void __i915_request_reset(struct i915_request *rq, bool guilty) { - GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", - rq->engine->name, - rq->fence.context, - rq->fence.seqno, - yesno(guilty)); + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); - lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); + rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) + if (mark_guilty(rq)) engine_skip_context(rq); } else { dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); + mark_innocent(rq); } -} - -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - - GEM_TRACE("%s\n", engine->name); - - if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); - - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); -} - -static void i915_stop_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (INTEL_GEN(i915) < 3) - return; - - for_each_engine_masked(engine, i915, engine_mask, tmp) - gen3_stop_engine(engine); + rcu_read_unlock(); } static bool i915_in_reset(struct pci_dev *pdev) @@ -194,11 +171,11 @@ static bool i915_in_reset(struct pci_dev *pdev) return gdrst & GRDOM_RESET_STATUS; } -static int i915_do_reset(struct drm_i915_private *i915, +static int i915_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; int err; /* Assert reset for at least 20 usec, and wait for acknowledgement. */ @@ -223,22 +200,22 @@ static bool g4x_reset_complete(struct pci_dev *pdev) return (gdrst & GRDOM_RESET_ENABLE) == 0; } -static int g33_do_reset(struct drm_i915_private *i915, +static int g33_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for_atomic(g4x_reset_complete(pdev), 50); } -static int g4x_do_reset(struct drm_i915_private *i915, +static int g4x_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; - struct intel_uncore *uncore = &i915->uncore; + struct pci_dev *pdev = gt->i915->drm.pdev; + struct intel_uncore *uncore = gt->uncore; int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ @@ -270,11 +247,10 @@ out: return ret; } -static int ironlake_do_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, + unsigned int retry) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int ret; intel_uncore_write_fw(uncore, ILK_GDSR, @@ -306,10 +282,9 @@ out: } /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *i915, - u32 hw_domain_mask) +static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int err; /* @@ -331,18 +306,18 @@ static int gen6_hw_domain_reset(struct drm_i915_private *i915, return err; } -static int gen6_reset_engines(struct drm_i915_private *i915, +static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, [BCS0] = GEN6_GRDOM_BLT, [VCS0] = GEN6_GRDOM_MEDIA, [VCS1] = GEN8_GRDOM_MEDIA2, [VECS0] = GEN6_GRDOM_VECS, }; + struct intel_engine_cs *engine; u32 hw_mask; if (engine_mask == ALL_ENGINES) { @@ -351,16 +326,16 @@ static int gen6_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } } - return gen6_hw_domain_reset(i915, hw_mask); + return gen6_hw_domain_reset(gt, hw_mask); } -static u32 gen11_lock_sfc(struct intel_engine_cs *engine) +static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; @@ -369,6 +344,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) i915_reg_t sfc_usage; u32 sfc_usage_bit; u32 sfc_reset_bit; + int ret; switch (engine->class) { case VIDEO_DECODE_CLASS: @@ -403,27 +379,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) } /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). + * If the engine is using a SFC, tell the engine that a software reset + * is going to happen. The engine will then try to force lock the SFC. + * If SFC ends up being locked to the engine we want to reset, we have + * to reset it as well (we will unlock it once the reset sequence is + * completed). */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) + return 0; + rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); - if (__intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + ret = __intel_wait_for_register_fw(uncore, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL); + + /* Was the SFC released while we were trying to lock it? */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) return 0; - } - if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; + if (ret) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return ret; + } + *hw_mask |= sfc_reset_bit; return 0; } @@ -455,11 +437,11 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); } -static int gen11_reset_engines(struct drm_i915_private *i915, +static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, [VCS0] = GEN11_GRDOM_MEDIA, @@ -478,17 +460,26 @@ static int gen11_reset_engines(struct drm_i915_private *i915, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(engine); + ret = gen11_lock_sfc(engine, &hw_mask); + if (ret) + goto sfc_unlock; } } - ret = gen6_hw_domain_reset(i915, hw_mask); + ret = gen6_hw_domain_reset(gt, hw_mask); +sfc_unlock: + /* + * We unlock the SFC based on the lock status and not the result of + * gen11_lock_sfc to make sure that we clean properly if something + * wrong happened during the lock (e.g. lock acquired after timeout + * expiration). + */ if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -538,7 +529,7 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); } -static int gen8_reset_engines(struct drm_i915_private *i915, +static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -547,7 +538,7 @@ static int gen8_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -563,34 +554,45 @@ static int gen8_reset_engines(struct drm_i915_private *i915, * We rather take context corruption instead of * failed reset with a wedged driver/gpu. And * active bb execution case should be covered by - * i915_stop_engines we have before the reset. + * stop_engines() we have before the reset. */ } - if (INTEL_GEN(i915) >= 11) - ret = gen11_reset_engines(i915, engine_mask, retry); + if (INTEL_GEN(gt->i915) >= 11) + ret = gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(i915, engine_mask, retry); + ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; } -typedef int (*reset_func)(struct drm_i915_private *, +static int mock_reset(struct intel_gt *gt, + intel_engine_mask_t mask, + unsigned int retry) +{ + return 0; +} + +typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); -static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +static reset_func intel_get_gpu_reset(const struct intel_gt *gt) { - if (INTEL_GEN(i915) >= 8) + struct drm_i915_private *i915 = gt->i915; + + if (is_mock_gt(gt)) + return mock_reset; + else if (INTEL_GEN(i915) >= 8) return gen8_reset_engines; else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; else if (INTEL_GEN(i915) >= 5) - return ironlake_do_reset; + return ilk_do_reset; else if (IS_G4X(i915)) return g4x_do_reset; else if (IS_G33(i915) || IS_PINEVIEW(i915)) @@ -601,15 +603,14 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) return NULL; } -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) +int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; reset_func reset; int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(i915); + reset = intel_get_gpu_reset(gt); if (!reset) return -ENODEV; @@ -617,59 +618,45 @@ int intel_gpu_reset(struct drm_i915_private *i915, * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - if (retry) - i915_stop_engines(i915, engine_mask); - - GEM_TRACE("engine_mask=%x\n", engine_mask); + GT_TRACE(gt, "engine_mask=%x\n", engine_mask); preempt_disable(); - ret = reset(i915, engine_mask, retry); + ret = reset(gt, engine_mask, retry); preempt_enable(); } - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } -bool intel_has_gpu_reset(struct drm_i915_private *i915) +bool intel_has_gpu_reset(const struct intel_gt *gt) { if (!i915_modparams.reset) return NULL; - return intel_get_gpu_reset(i915); + return intel_get_gpu_reset(gt); } -bool intel_has_reset_engine(struct drm_i915_private *i915) +bool intel_has_reset_engine(const struct intel_gt *gt) { - return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; + if (i915_modparams.reset < 2) + return false; + + return INTEL_INFO(gt->i915)->has_reset_engine; } -int intel_reset_guc(struct drm_i915_private *i915) +int intel_reset_guc(struct intel_gt *gt) { u32 guc_domain = - INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; int ret; - GEM_BUG_ON(!HAS_GUC(i915)); + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(i915, guc_domain); - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(gt, guc_domain); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } @@ -688,59 +675,64 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - engine->reset.prepare(engine); + if (engine->reset.prepare) + engine->reset.prepare(engine); } -static void revoke_mmaps(struct drm_i915_private *i915) +static void revoke_mmaps(struct intel_gt *gt) { int i; - for (i = 0; i < i915->ggtt.num_fences; i++) { + for (i = 0; i < gt->ggtt->num_fences; i++) { struct drm_vma_offset_node *node; struct i915_vma *vma; u64 vma_offset; - vma = READ_ONCE(i915->ggtt.fence_regs[i].vma); + vma = READ_ONCE(gt->ggtt->fence_regs[i].vma); if (!vma) continue; if (!i915_vma_has_userfault(vma)) continue; - GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]); - node = &vma->obj->base.vma_node; + GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); + + if (!vma->mmo) + continue; + + node = &vma->mmo->vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; - unmap_mapping_range(i915->drm.anon_inode->i_mapping, + + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, 1); } } -static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct intel_gt *gt) { struct intel_engine_cs *engine; intel_engine_mask_t awake = 0; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); } - intel_uc_reset_prepare(i915); + intel_uc_reset_prepare(>->uc); return awake; } -static void gt_revoke(struct drm_i915_private *i915) +static void gt_revoke(struct intel_gt *gt) { - revoke_mmaps(i915); + revoke_mmaps(gt); } -static int gt_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -750,33 +742,33 @@ static int gt_reset(struct drm_i915_private *i915, * Everything depends on having the GTT running, so we need to start * there. */ - err = i915_ggtt_enable_hw(i915); + err = i915_ggtt_enable_hw(gt->i915); if (err) return err; - for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & engine->mask); + for_each_engine(engine, gt, id) + __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(i915); + i915_gem_restore_fences(gt->ggtt); return err; } static void reset_finish_engine(struct intel_engine_cs *engine) { - engine->reset.finish(engine); + if (engine->reset.finish) + engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915, - intel_engine_mask_t awake) +static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { reset_finish_engine(engine); if (awake & engine->mask) intel_engine_pm_put(engine); @@ -788,8 +780,7 @@ static void nop_submit_request(struct i915_request *request) struct intel_engine_cs *engine = request->engine; unsigned long flags; - GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - engine->name, request->fence.context, request->fence.seqno); + RQ_TRACE(request, "-EIO\n"); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&engine->active.lock, flags); @@ -797,44 +788,40 @@ static void nop_submit_request(struct i915_request *request) i915_request_mark_complete(request); spin_unlock_irqrestore(&engine->active.lock, flags); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } -static void __i915_gem_set_wedged(struct drm_i915_private *i915) +static void __intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_engine_mask_t awake; enum intel_engine_id id; - if (test_bit(I915_WEDGED, &error->flags)) + if (test_bit(I915_WEDGED, >->reset.flags)) return; - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * First, stop submission to hw, but do not yet complete requests by * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - awake = reset_prepare(i915); + awake = reset_prepare(gt); /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_gpu_reset(i915, ALL_ENGINES); + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + __intel_gt_reset(gt, ALL_ENGINES); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) engine->submit_request = nop_submit_request; - engine->schedule = NULL; - } - i915->caps.scheduler = 0; /* * Make sure no request can slip through without getting completed by @@ -842,40 +829,42 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); - set_bit(I915_WEDGED, &error->flags); + set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) - engine->cancel_requests(engine); + for_each_engine(engine, gt, id) + if (engine->reset.cancel) + engine->reset.cancel(engine); - reset_finish(i915, awake); + reset_finish(gt, awake); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); } -void i915_gem_set_wedged(struct drm_i915_private *i915) +void intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; intel_wakeref_t wakeref; - mutex_lock(&error->wedge_mutex); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - __i915_gem_set_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + __intel_gt_set_wedged(gt); + mutex_unlock(>->reset.mutex); } -static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) +static bool __intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl; + bool ok; - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + /* Never fully initialised, recovery impossible */ + if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * Before unwedging, make sure that all pending operations @@ -887,14 +876,16 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * * No more can be submitted until we reset the wedged bit. */ - mutex_lock(&i915->gt.timelines.mutex); - list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { - struct i915_request *rq; + spin_lock(&timelines->lock); + list_for_each_entry(tl, &timelines->active_list, link) { + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; + spin_unlock(&timelines->lock); + /* * All internal dependencies (i915_requests) will have * been flushed by the set-wedge, but we may be stuck waiting @@ -902,12 +893,27 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * (I915_FENCE_TIMEOUT) so this wait should not be unbounded * in the worst case. */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(fence); + + /* Restart iteration after droping lock */ + spin_lock(&timelines->lock); + tl = list_entry(&timelines->active_list, typeof(*tl), link); } - mutex_unlock(&i915->gt.timelines.mutex); + spin_unlock(&timelines->lock); - intel_gt_sanitize(i915, false); + /* We must reset pending GPU events before restoring our submission */ + ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; + if (!ok) { + /* + * Warn CI about the unrecoverable wedged condition. + * Time for a reboot. + */ + add_taint_for_CI(TAINT_WARN); + return false; + } /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -918,53 +924,51 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * the nop_submit_request on reset, we can do this from normal * context and do not require stop_machine(). */ - intel_engines_reset_default_submission(i915); + intel_engines_reset_default_submission(gt); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); + clear_bit(I915_WEDGED, >->reset.flags); return true; } -bool i915_gem_unset_wedged(struct drm_i915_private *i915) +bool intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; bool result; - mutex_lock(&error->wedge_mutex); - result = __i915_gem_unset_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + result = __intel_gt_unset_wedged(gt); + mutex_unlock(>->reset.mutex); return result; } -static int do_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { int err, i; - gt_revoke(i915); + gt_revoke(gt); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = __intel_gt_reset(gt, ALL_ENGINES); for (i = 0; err && i < RESET_MAX_RETRIES; i++) { msleep(10 * (i + 1)); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = __intel_gt_reset(gt, ALL_ENGINES); } if (err) return err; - return gt_reset(i915, stalled_mask); + return gt_reset(gt, stalled_mask); } -static int resume(struct drm_i915_private *i915) +static int resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; int ret; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { ret = engine->resume(engine); if (ret) return ret; @@ -974,8 +978,8 @@ static int resume(struct drm_i915_private *i915) } /** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset + * intel_gt_reset - reset chip after a hang + * @gt: #intel_gt to reset * @stalled_mask: mask of the stalled engines with the guilty requests * @reason: user error message for why we are resetting * @@ -990,50 +994,50 @@ static int resume(struct drm_i915_private *i915) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason) +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; intel_engine_mask_t awake; int ret; - GEM_TRACE("flags=%lx\n", error->flags); + GT_TRACE(gt, "flags=%lx\n", gt->reset.flags); might_sleep(); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - mutex_lock(&error->wedge_mutex); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); + mutex_lock(>->reset.mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!__i915_gem_unset_wedged(i915)) + if (!__intel_gt_unset_wedged(gt)) goto unlock; if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; + dev_notice(gt->i915->drm.dev, + "Resetting chip for %s\n", reason); + atomic_inc(>->i915->gpu_error.reset_count); - awake = reset_prepare(i915); + awake = reset_prepare(gt); - if (!intel_has_gpu_reset(i915)) { + if (!intel_has_gpu_reset(gt)) { if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); + dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_disable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_disable_interrupts(gt->i915); - if (do_reset(i915, stalled_mask)) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); + if (do_reset(gt, stalled_mask)) { + dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); goto taint; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_enable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_enable_interrupts(gt->i915); - intel_overlay_reset(i915); + intel_overlay_reset(gt->i915); /* * Next we need to restore the context, but we don't use those @@ -1043,23 +1047,21 @@ void i915_reset(struct drm_i915_private *i915, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(i915); + ret = intel_gt_init_hw(gt); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); goto taint; } - ret = resume(i915); + ret = resume(gt); if (ret) goto taint; - i915_queue_hangcheck(i915); - finish: - reset_finish(i915, awake); + reset_finish(gt, awake); unlock: - mutex_unlock(&error->wedge_mutex); + mutex_unlock(>->reset.mutex); return; taint: @@ -1077,18 +1079,17 @@ taint: */ add_taint_for_CI(TAINT_WARN); error: - __i915_gem_set_wedged(i915); + __intel_gt_set_wedged(gt); goto finish; } -static inline int intel_gt_reset_engine(struct drm_i915_private *i915, - struct intel_engine_cs *engine) +static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) { - return intel_gpu_reset(i915, engine->mask); + return __intel_gt_reset(engine->gt, engine->mask); } /** - * i915_reset_engine - reset GPU engine to recover from a hang + * intel_engine_reset - reset GPU engine to recover from a hang * @engine: engine to reset * @msg: reason for GPU reset; or NULL for no dev_notice() * @@ -1100,13 +1101,14 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { - struct i915_gpu_error *error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); if (!intel_engine_pm_get_if_awake(engine)) return 0; @@ -1116,16 +1118,16 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) if (msg) dev_notice(engine->i915->drm.dev, "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; + atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); + if (!uses_guc) + ret = intel_gt_reset_engine(engine); else - ret = intel_guc_reset_engine(&engine->i915->guc, engine); + ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->i915->guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } @@ -1135,7 +1137,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - intel_engine_reset(engine, true); + __intel_engine_reset(engine, true); /* * The engine and its registers (and workarounds in case of render) @@ -1147,20 +1149,19 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } -static void i915_reset_device(struct drm_i915_private *i915, - u32 engine_mask, - const char *reason) +static void intel_gt_reset_global(struct intel_gt *gt, + u32 engine_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; - struct kobject *kobj = &i915->drm.primary->kdev->kobj; + struct kobject *kobj = >->i915->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct i915_wedge_me w; + struct intel_wedge_me w; kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); @@ -1168,137 +1169,24 @@ static void i915_reset_device(struct drm_i915_private *i915, kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, i915, 5 * HZ) { - intel_prepare_reset(i915); + intel_wedge_on_timeout(&w, gt, 5 * HZ) { + intel_prepare_reset(gt->i915); /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(&error->reset_backoff_srcu); + synchronize_srcu_expedited(>->reset.backoff_srcu); - i915_reset(i915, engine_mask, reason); + intel_gt_reset(gt, engine_mask, reason); - intel_finish_reset(i915); + intel_finish_reset(gt->i915); } - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } -static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) -{ - intel_uncore_rmw(uncore, reg, 0, 0); -} - -static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) -{ - GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); - GEN6_RING_FAULT_REG_POSTING_READ(engine); -} - -static void clear_error_registers(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - struct intel_uncore *uncore = &i915->uncore; - u32 eir; - - if (!IS_GEN(i915, 2)) - clear_register(uncore, PGTBL_ER); - - if (INTEL_GEN(i915) < 4) - clear_register(uncore, IPEIR(RENDER_RING_BASE)); - else - clear_register(uncore, IPEIR_I965); - - clear_register(uncore, EIR); - eir = intel_uncore_read(uncore, EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - rmw_set(uncore, EMR, eir); - intel_uncore_write(uncore, GEN2_IIR, - I915_MASTER_ERROR_INTERRUPT); - } - - if (INTEL_GEN(i915) >= 8) { - rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); - intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(i915) >= 6) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine_masked(engine, i915, engine_mask, id) - gen8_clear_engine_error_register(engine); - } -} - -static void gen6_check_faults(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 fault; - - for_each_engine(engine, dev_priv, id) { - fault = GEN6_RING_FAULT_REG_READ(engine); - if (fault & RING_FAULT_VALID) { - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08lx\n" - "\tAddress space: %s\n" - "\tSource ID: %d\n" - "\tType: %d\n", - fault & PAGE_MASK, - fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } - } -} - -static void gen8_check_faults(struct drm_i915_private *dev_priv) -{ - u32 fault = I915_READ(GEN8_RING_FAULT_REG); - - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); - fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), - lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } -} - -void i915_check_and_clear_faults(struct drm_i915_private *i915) -{ - /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (INTEL_GEN(i915) >= 8) - gen8_check_faults(i915); - else if (INTEL_GEN(i915) >= 6) - gen6_check_faults(i915); - else - return; - - clear_error_registers(i915, ALL_ENGINES); -} - /** - * i915_handle_error - handle a gpu error - * @i915: i915 device private + * intel_gt_handle_error - handle a gpu error + * @gt: the intel_gt * @engine_mask: mask representing engines that are hung * @flags: control flags * @fmt: Error message format string @@ -1309,12 +1197,11 @@ void i915_check_and_clear_faults(struct drm_i915_private *i915) * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...) +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_wakeref_t wakeref; intel_engine_mask_t tmp; @@ -1338,33 +1225,31 @@ void i915_handle_error(struct drm_i915_private *i915, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - engine_mask &= INTEL_INFO(i915)->engine_mask; + engine_mask &= INTEL_INFO(gt->i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(i915, engine_mask, msg); - clear_error_registers(i915, engine_mask); + i915_capture_error_state(gt->i915); + intel_gt_clear_error_registers(gt, engine_mask); } /* * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { - for_each_engine_masked(engine, i915, engine_mask, tmp) { + if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) + >->reset.flags)) continue; - if (i915_reset_engine(engine, msg) == 0) + if (intel_engine_reset(engine, msg) == 0) engine_mask &= ~engine->mask; - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - wake_up_bit(&error->flags, - I915_RESET_ENGINE + engine->id); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); } } @@ -1372,9 +1257,9 @@ void i915_handle_error(struct drm_i915_private *i915, goto out; /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) { - wait_event(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, &error->flags)); + if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) { + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); goto out; /* piggy-back on the other reset */ } @@ -1382,115 +1267,127 @@ void i915_handle_error(struct drm_i915_private *i915, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, i915, tmp) { + for_each_engine(engine, gt, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) - wait_on_bit(&error->flags, + >->reset.flags)) + wait_on_bit(>->reset.flags, I915_RESET_ENGINE + engine->id, TASK_UNINTERRUPTIBLE); } - i915_reset_device(i915, engine_mask, msg); - - for_each_engine(engine, i915, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - } + intel_gt_reset_global(gt, engine_mask, msg); - clear_bit(I915_RESET_BACKOFF, &error->flags); - wake_up_all(&error->reset_queue); + for_each_engine(engine, gt, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); + smp_mb__after_atomic(); + wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } -int i915_reset_trylock(struct drm_i915_private *i915) +int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) { - struct i915_gpu_error *error = &i915->gpu_error; - int srcu; - - might_lock(&error->reset_backoff_srcu); + might_lock(>->reset.backoff_srcu); might_sleep(); rcu_read_lock(); - while (test_bit(I915_RESET_BACKOFF, &error->flags)) { + while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { rcu_read_unlock(); - if (wait_event_interruptible(error->reset_queue, + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; rcu_read_lock(); } - srcu = srcu_read_lock(&error->reset_backoff_srcu); + *srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); - return srcu; + return 0; } -void i915_reset_unlock(struct drm_i915_private *i915, int tag) -__releases(&i915->gpu_error.reset_backoff_srcu) +void intel_gt_reset_unlock(struct intel_gt *gt, int tag) +__releases(>->reset.backoff_srcu) { - struct i915_gpu_error *error = &i915->gpu_error; - - srcu_read_unlock(&error->reset_backoff_srcu, tag); + srcu_read_unlock(>->reset.backoff_srcu, tag); } -int i915_terminally_wedged(struct drm_i915_private *i915) +int intel_gt_terminally_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - might_sleep(); - if (!__i915_wedged(error)) + if (!intel_gt_is_wedged(gt)) return 0; - /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, &error->flags)) + if (intel_gt_has_init_error(gt)) return -EIO; - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(&i915->drm.struct_mutex)) - return -EAGAIN; - - if (wait_event_interruptible(error->reset_queue, + /* Reset still in progress? Maybe we will recover? */ + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; - return __i915_wedged(error) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; +} + +void intel_gt_set_wedged_on_init(struct intel_gt *gt) +{ + BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > + I915_WEDGED_ON_INIT); + intel_gt_set_wedged(gt); + set_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + +void intel_gt_init_reset(struct intel_gt *gt) +{ + init_waitqueue_head(>->reset.queue); + mutex_init(>->reset.mutex); + init_srcu_struct(>->reset.backoff_srcu); + + /* no GPU until we are ready! */ + __set_bit(I915_WEDGED, >->reset.flags); +} + +void intel_gt_fini_reset(struct intel_gt *gt) +{ + cleanup_srcu_struct(>->reset.backoff_srcu); } -static void i915_wedge_me(struct work_struct *work) +static void intel_wedge_me(struct work_struct *work) { - struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); - dev_err(w->i915->drm.dev, + dev_err(w->gt->i915->drm.dev, "%s timed out, cancelling all in-flight rendering.\n", w->name); - i915_gem_set_wedged(w->i915); + intel_gt_set_wedged(w->gt); } -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name) { - w->i915 = i915; + w->gt = gt; w->name = name; - INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); schedule_delayed_work(&w->work, timeout); } -void __i915_fini_wedge(struct i915_wedge_me *w) +void __intel_fini_wedge(struct intel_wedge_me *w) { cancel_delayed_work_sync(&w->work); destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; + w->gt = NULL; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 580ebdb59eca..8e8d5f761166 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -11,58 +11,75 @@ #include <linux/types.h> #include <linux/srcu.h> -#include "gt/intel_engine_types.h" +#include "intel_engine_types.h" +#include "intel_reset_types.h" -struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_guc; +void intel_gt_init_reset(struct intel_gt *gt); +void intel_gt_fini_reset(struct intel_gt *gt); + __printf(4, 5) -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...); +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) -void i915_check_and_clear_faults(struct drm_i915_private *i915); - -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason); -int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason); +int intel_engine_reset(struct intel_engine_cs *engine, + const char *reason); -void i915_reset_request(struct i915_request *rq, bool guilty); +void __i915_request_reset(struct i915_request *rq, bool guilty); -int __must_check i915_reset_trylock(struct drm_i915_private *i915); -void i915_reset_unlock(struct drm_i915_private *i915, int tag); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu); +void intel_gt_reset_unlock(struct intel_gt *gt, int tag); -int i915_terminally_wedged(struct drm_i915_private *i915); +void intel_gt_set_wedged(struct intel_gt *gt); +bool intel_gt_unset_wedged(struct intel_gt *gt); +int intel_gt_terminally_wedged(struct intel_gt *gt); -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +/* + * There's no unset_wedged_on_init paired with this one. + * Once we're wedged on init, there's no going back. + */ +void intel_gt_set_wedged_on_init(struct intel_gt *gt); -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); +int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); -int intel_reset_guc(struct drm_i915_private *i915); +int intel_reset_guc(struct intel_gt *gt); -struct i915_wedge_me { +struct intel_wedge_me { struct delayed_work work; - struct drm_i915_private *i915; + struct intel_gt *gt; const char *name; }; -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name); -void __i915_fini_wedge(struct i915_wedge_me *w); +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name); +void __intel_fini_wedge(struct intel_wedge_me *w); + +#define intel_wedge_on_timeout(W, GT, TIMEOUT) \ + for (__intel_init_wedge((W), (GT), (TIMEOUT), __func__); \ + (W)->gt; \ + __intel_fini_wedge((W))) + +static inline bool __intel_reset_failed(const struct intel_reset *reset) +{ + GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ? + !test_bit(I915_WEDGED, &reset->flags) : false); + + return unlikely(test_bit(I915_WEDGED, &reset->flags)); +} -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __i915_fini_wedge((W))) +bool intel_has_gpu_reset(const struct intel_gt *gt); +bool intel_has_reset_engine(const struct intel_gt *gt); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h new file mode 100644 index 000000000000..f43bc3a0fe4f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_RESET_TYPES_H_ +#define __INTEL_RESET_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/srcu.h> + +struct intel_reset { + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a global reset, we need to + * serialise with any other users attempting to do the same, and + * any global resources that may be clobber by the reset (such as + * FENCE registers). + * + * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to + * acquire the struct_mutex to reset an engine, we need an explicit + * flag to prevent two concurrent reset attempts in the same engine. + * As the number of engines continues to grow, allocate the flags from + * the most significant bits. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + * + * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no + * longer use the GPU - similar to #I915_WEDGED bit. The difference in + * in the way we're handling "forced" unwedged (e.g. through debugfs), + * which is not allowed in case we failed to initialize. + */ + unsigned long flags; +#define I915_RESET_BACKOFF 0 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 +#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2) +#define I915_WEDGED (BITS_PER_LONG - 1) + + struct mutex mutex; /* serialises wedging/unwedging */ + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t queue; + + struct srcu_struct backoff_srcu; +}; + +#endif /* _INTEL_RESET_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c new file mode 100644 index 000000000000..374b28f13ca0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_object.h" +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_engine.h" +#include "intel_ring.h" +#include "intel_timeline.h" + +unsigned int intel_ring_update_space(struct intel_ring *ring) +{ + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; +} + +int intel_ring_pin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + unsigned int flags; + void *addr; + int ret; + + if (atomic_fetch_inc(&ring->pin_count)) + return 0; + + flags = PIN_GLOBAL; + + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + ret = i915_vma_pin(vma, 0, 0, flags); + if (unlikely(ret)) + goto err_unpin; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } + + i915_vma_make_unshrinkable(vma); + + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + + ring->vaddr = addr; + return 0; + +err_ring: + i915_vma_unpin(vma); +err_unpin: + atomic_dec(&ring->pin_count); + return ret; +} + +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + tail = intel_ring_wrap(ring, tail); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + +void intel_ring_unpin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + + if (!atomic_dec_and_test(&ring->pin_count)) + return; + + i915_vma_unset_ggtt_write(vma); + if (i915_vma_is_map_and_fenceable(vma)) + i915_vma_unpin_iomap(vma); + else + i915_gem_object_unpin_map(vma->obj); + + i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); +} + +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = ERR_PTR(-ENODEV); + if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + return ERR_PTR(-ENOMEM); + + kref_init(&ring->ref); + ring->size = size; + + /* + * Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = size; + if (IS_I830(i915) || IS_I845G(i915)) + ring->effective_size -= 2 * CACHELINE_BYTES; + + intel_ring_update_space(ring); + + vma = create_ring_vma(engine->gt->ggtt, size); + if (IS_ERR(vma)) { + kfree(ring); + return ERR_CAST(vma); + } + ring->vma = vma; + + return ring; +} + +void intel_ring_free(struct kref *ref) +{ + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); + + i915_vma_put(ring->vma); + kfree(ring); +} + +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) +{ + struct i915_request *target; + long timeout; + + if (intel_ring_update_space(ring) >= bytes) + return 0; + + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + + /* Would completion of this request free enough space? */ + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) + break; + } + + if (GEM_WARN_ON(&target->link == &tl->requests)) + return -ENOSPC; + + timeout = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; + + i915_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; +} + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) +{ + struct intel_ring *ring = rq->ring; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; + u32 *cs; + + /* Packets must be qword aligned. */ + GEM_BUG_ON(num_dwords & 1); + + total_bytes = bytes + rq->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); + + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = rq->reserved_space + remain_actual; + } + } + + if (unlikely(total_bytes > ring->space)) { + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_request_alloc() and i915_request_add(). + */ + GEM_BUG_ON(!rq->reserved_space); + + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); + if (unlikely(ret)) + return ERR_PTR(ret); + } + + if (unlikely(need_wrap)) { + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); + + /* Fill the tail with MI_NOOP */ + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); + ring->space -= need_wrap; + ring->emit = 0; + } + + GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); + ring->emit += bytes; + ring->space -= bytes; + + return cs; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct i915_request *rq) +{ + int num_dwords; + void *cs; + + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); + if (num_dwords == 0) + return 0; + + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + + cs = intel_ring_begin(rq, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); + intel_ring_advance(rq, cs + num_dwords); + + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h new file mode 100644 index 000000000000..ea2839d9e044 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_H +#define INTEL_RING_H + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "i915_request.h" +#include "intel_ring_types.h" + +struct intel_engine_cs; + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); +int intel_ring_cacheline_align(struct i915_request *rq); + +unsigned int intel_ring_update_space(struct intel_ring *ring); + +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); + +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +#endif /* INTEL_RING_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 12010e798868..bc44fe8e5ffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -33,11 +33,15 @@ #include "gem/i915_gem_context.h" +#include "gen6_ppgtt.h" #include "i915_drv.h" -#include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, @@ -45,16 +49,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -unsigned int intel_ring_update_space(struct intel_ring *ring) -{ - unsigned int space; - - space = __intel_ring_space(ring->head, ring->emit, ring->size); - - ring->space = space; - return space; -} - static int gen2_render_ring_flush(struct i915_request *rq, u32 mode) { @@ -75,7 +69,8 @@ gen2_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = cmd; while (num_store_dw--) { *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = i915_scratch_offset(rq->i915); + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = 0; } *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; @@ -148,7 +143,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) */ if (mode & EMIT_INVALIDATE) { *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; @@ -156,7 +153,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = MI_FLUSH; *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; } @@ -208,7 +207,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) static int gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs; cs = intel_ring_begin(rq, 6); @@ -241,7 +242,9 @@ gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) static int gen6_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs, flags = 0; int ret; @@ -299,7 +302,9 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; /* Finally we can flush and with it emit the breadcrumb */ @@ -309,7 +314,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -342,7 +348,9 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq) static int gen7_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs, flags = 0; /* @@ -355,6 +363,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) */ flags |= PIPE_CONTROL_CS_STALL; + /* + * CS_STALL suggests at least a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. @@ -373,13 +387,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache @@ -410,7 +417,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -424,8 +431,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -444,10 +451,11 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; @@ -489,14 +497,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask) static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) { - struct drm_i915_private *dev_priv = engine->i915; u32 addr; addr = lower_32_bits(phys); - if (INTEL_GEN(dev_priv) >= 4) + if (INTEL_GEN(engine->i915) >= 4) addr |= (phys >> 28) & 0xf0; - I915_WRITE(HWS_PGA, addr); + intel_uncore_write(engine->uncore, HWS_PGA, addr); } static struct page *status_page(struct intel_engine_cs *engine) @@ -515,14 +522,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine) static void set_hwsp(struct intel_engine_cs *engine, u32 offset) { - struct drm_i915_private *dev_priv = engine->i915; i915_reg_t hwsp; /* * The ring status page addresses are no longer next to the rest of * the ring registers as of gen7. */ - if (IS_GEN(dev_priv, 7)) { + if (IS_GEN(engine->i915, 7)) { switch (engine->id) { /* * No more rings exist on Gen7. Default case is only to shut up @@ -544,14 +550,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = VEBOX_HWS_PGA_GEN7; break; } - } else if (IS_GEN(dev_priv, 6)) { + } else if (IS_GEN(engine->i915, 6)) { hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); } else { hwsp = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(hwsp, offset); - POSTING_READ(hwsp); + intel_uncore_write(engine->uncore, hwsp, offset); + intel_uncore_posting_read(engine->uncore, hwsp); } static void flush_cs_tlb(struct intel_engine_cs *engine) @@ -623,14 +629,15 @@ static bool stop_ring(struct intel_engine_cs *engine) static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ring *ring = engine->buffer; + struct intel_ring *ring = engine->legacy.ring; int ret = 0; - GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", - engine->name, ring->head, ring->tail); + ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", + ring->head, ring->tail); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); + /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */ DRM_DEBUG_DRIVER("%s head not reset to zero " @@ -662,19 +669,16 @@ static int xcs_resume(struct intel_engine_cs *engine) intel_engine_reset_breadcrumbs(engine); /* Enforce ordering by reading HEAD register back */ - ENGINE_READ(engine, RING_HEAD); + ENGINE_POSTING_READ(engine, RING_HEAD); - /* Initialize the ring. This must happen _after_ we've cleared the ring + /* + * Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring - * register values. */ + * register values. + */ ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); - /* WaClearRingBufHeadRegAtInit:ctg,elk */ - if (ENGINE_READ(engine, RING_HEAD)) - DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", - engine->name, ENGINE_READ(engine, RING_HEAD)); - /* Check that the ring offsets point within the ring! */ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); @@ -716,7 +720,7 @@ static int xcs_resume(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); out: intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); @@ -725,10 +729,47 @@ out: static void reset_prepare(struct intel_engine_cs *engine) { - intel_engine_stop_cs(engine); + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + ENGINE_TRACE(engine, "\n"); + + if (intel_engine_stop_cs(engine)) + ENGINE_TRACE(engine, "timed out on STOP_RING\n"); + + intel_uncore_write_fw(uncore, + RING_HEAD(base), + intel_uncore_read_fw(uncore, RING_TAIL(base))); + intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ + + intel_uncore_write_fw(uncore, RING_HEAD(base), 0); + intel_uncore_write_fw(uncore, RING_TAIL(base), 0); + intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + intel_uncore_write_fw(uncore, RING_CTL(base), 0); + + /* Check acts as a post */ + if (intel_uncore_read_fw(uncore, RING_HEAD(base))) + ENGINE_TRACE(engine, "ring head [%x] not parked\n", + intel_uncore_read_fw(uncore, RING_HEAD(base))); } -static void reset_ring(struct intel_engine_cs *engine, bool stalled) +static void reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct i915_request *pos, *rq; unsigned long flags; @@ -781,14 +822,14 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) * If the request was innocent, we try to replay the request * with the restored context. */ - i915_reset_request(rq, stalled); + __i915_request_reset(rq, stalled); - GEM_BUG_ON(rq->ring != engine->buffer); + GEM_BUG_ON(rq->ring != engine->legacy.ring); head = rq->head; } else { - head = engine->buffer->tail; + head = engine->legacy.ring->tail; } - engine->buffer->head = intel_ring_wrap(engine->buffer, head); + engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -797,24 +838,10 @@ static void reset_finish(struct intel_engine_cs *engine) { } -static int intel_rcs_ctx_init(struct i915_request *rq) -{ - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret != 0) - return ret; - - ret = i915_gem_render_state_emit(rq); - if (ret) - return ret; - - return 0; -} - static int rcs_resume(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; /* * Disable CONSTANT_BUFFER before it is loaded from the context @@ -826,13 +853,14 @@ static int rcs_resume(struct intel_engine_cs *engine) * they are already accustomed to from before contexts were * enabled. */ - if (IS_GEN(dev_priv, 4)) - I915_WRITE(ECOSKPD, + if (IS_GEN(i915, 4)) + intel_uncore_write(uncore, ECOSKPD, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ - if (IS_GEN_RANGE(dev_priv, 4, 6)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); + if (IS_GEN_RANGE(i915, 4, 6)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -840,38 +868,40 @@ static int rcs_resume(struct intel_engine_cs *engine) * * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv */ - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); /* Required for the hardware to program scanline values for waiting */ /* WaEnableFlushTlbInvalidationMode:snb */ - if (IS_GEN(dev_priv, 6)) - I915_WRITE(GFX_MODE, + if (IS_GEN(i915, 6)) + intel_uncore_write(uncore, GFX_MODE, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ - if (IS_GEN(dev_priv, 7)) - I915_WRITE(GFX_MODE_GEN7, + if (IS_GEN(i915, 7)) + intel_uncore_write(uncore, GFX_MODE_GEN7, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(i915, 6)) { /* From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement * policy. [...] This bit must be reset. LRA replacement * policy is not supported." */ - I915_WRITE(CACHE_MODE_0, + intel_uncore_write(uncore, CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); } - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, INSTPM, + _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); return xcs_resume(engine); } -static void cancel_requests(struct intel_engine_cs *engine) +static void reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -894,6 +924,7 @@ static void cancel_requests(struct intel_engine_cs *engine) static void i9xx_submit_request(struct i915_request *request) { i915_request_submit(request); + wmb(); /* paranoid flush writes out of the WCB before mmio */ ENGINE_WRITE(request->engine, RING_TAIL, intel_ring_set_tail(request->ring, request->tail)); @@ -901,8 +932,8 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -924,8 +955,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -948,13 +979,13 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) static void gen5_irq_enable(struct intel_engine_cs *engine) { - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); } static void gen5_irq_disable(struct intel_engine_cs *engine) { - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); } static void @@ -1015,14 +1046,14 @@ gen6_irq_enable(struct intel_engine_cs *engine) /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ ENGINE_POSTING_READ(engine, RING_IMR); - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); } static void gen6_irq_disable(struct intel_engine_cs *engine) { ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); } static void @@ -1033,14 +1064,14 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine) /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ ENGINE_POSTING_READ(engine, RING_IMR); - gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask); + gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); } static void hsw_vebox_irq_disable(struct intel_engine_cs *engine) { ENGINE_WRITE(engine, RING_IMR, ~0); - gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask); + gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); } static int @@ -1071,9 +1102,11 @@ i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_scratch_offset(rq->i915); + u32 *cs, cs_offset = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); - GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); + GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1100,7 +1133,7 @@ i830_emit_bb_start(struct i915_request *rq, * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; *cs++ = cs_offset; @@ -1146,179 +1179,9 @@ i915_emit_bb_start(struct i915_request *rq, return 0; } -int intel_ring_pin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - unsigned int flags; - void *addr; - int ret; - - if (atomic_fetch_inc(&ring->pin_count)) - return 0; - - ret = i915_timeline_pin(ring->timeline); - if (ret) - goto err_unpin; - - flags = PIN_GLOBAL; - - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - ret = i915_vma_pin(vma, 0, 0, flags); - if (unlikely(ret)) - goto err_timeline; - - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_ring; - } - - vma->obj->pin_global++; - - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; - - return 0; - -err_ring: - i915_vma_unpin(vma); -err_timeline: - i915_timeline_unpin(ring->timeline); -err_unpin: - atomic_dec(&ring->pin_count); - return ret; -} - -void intel_ring_reset(struct intel_ring *ring, u32 tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - ring->tail = tail; - ring->head = tail; - ring->emit = tail; - intel_ring_update_space(ring); -} - -void intel_ring_unpin(struct intel_ring *ring) -{ - if (!atomic_dec_and_test(&ring->pin_count)) - return; - - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->tail); - - GEM_BUG_ON(!ring->vma); - if (i915_vma_is_map_and_fenceable(ring->vma)) - i915_vma_unpin_iomap(ring->vma); - else - i915_gem_object_unpin_map(ring->vma->obj); - - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - ring->vma->obj->pin_global--; - i915_vma_unpin(ring->vma); - - i915_timeline_unpin(ring->timeline); -} - -static struct i915_vma * -intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) -{ - struct i915_address_space *vm = &dev_priv->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_stolen(dev_priv, size); - if (!obj) - obj = i915_gem_object_create_internal(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Mark ring buffers as read-only from GPU side (so no stray overwrites) - * if supported by the platform's GGTT. - */ - if (vm->has_read_only) - i915_gem_object_set_readonly(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size) -{ - struct intel_ring *ring; - struct i915_vma *vma; - - GEM_BUG_ON(!is_power_of_2(size)); - GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - return ERR_PTR(-ENOMEM); - - kref_init(&ring->ref); - INIT_LIST_HEAD(&ring->request_list); - ring->timeline = i915_timeline_get(timeline); - - ring->size = size; - /* Workaround an erratum on the i830 which causes a hang if - * the TAIL pointer points to within the last 2 cachelines - * of the buffer. - */ - ring->effective_size = size; - if (IS_I830(engine->i915) || IS_I845G(engine->i915)) - ring->effective_size -= 2 * CACHELINE_BYTES; - - intel_ring_update_space(ring); - - vma = intel_ring_create_vma(engine->i915, size); - if (IS_ERR(vma)) { - kfree(ring); - return ERR_CAST(vma); - } - ring->vma = vma; - - return ring; -} - -void intel_ring_free(struct kref *ref) -{ - struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - - i915_vma_close(ring->vma); - i915_vma_put(ring->vma); - - i915_timeline_put(ring->timeline); - kfree(ring); -} - static void __ring_context_fini(struct intel_context *ce) { - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) @@ -1330,33 +1193,45 @@ static void ring_context_destroy(struct kref *ref) if (ce->state) __ring_context_fini(ce); + intel_context_fini(ce); intel_context_free(ce); } -static int __context_pin_ppgtt(struct i915_gem_context *ctx) +static struct i915_address_space *vm_alias(struct intel_context *ce) +{ + struct i915_address_space *vm; + + vm = ce->vm; + if (i915_is_ggtt(vm)) + vm = &i915_vm_to_ggtt(vm)->alias->vm; + + return vm; +} + +static int __context_pin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; int err = 0; - vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + vm = vm_alias(ce); if (vm) err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); return err; } -static void __context_unpin_ppgtt(struct i915_gem_context *ctx) +static void __context_unpin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; - vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + vm = vm_alias(ce); if (vm) gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); } static void ring_context_unpin(struct intel_context *ce) { - __context_unpin_ppgtt(ce->gem_context); + __context_unpin_ppgtt(ce); } static struct i915_vma * @@ -1412,7 +1287,7 @@ alloc_context_vma(struct intel_engine_cs *engine) i915_gem_object_unpin_map(obj); } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -1427,16 +1302,17 @@ err_obj: return ERR_PTR(err); } -static int ring_context_pin(struct intel_context *ce) +static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - int err; /* One ringbuffer to rule them all */ - GEM_BUG_ON(!engine->buffer); - ce->ring = engine->buffer; + GEM_BUG_ON(!engine->legacy.ring); + ce->ring = engine->legacy.ring; + ce->timeline = intel_timeline_get(engine->legacy.timeline); - if (!ce->state && engine->context_size) { + GEM_BUG_ON(ce->state); + if (engine->context_size) { struct i915_vma *vma; vma = alloc_context_vma(engine); @@ -1444,29 +1320,26 @@ static int ring_context_pin(struct intel_context *ce) return PTR_ERR(vma); ce->state = vma; + if (engine->default_state) + __set_bit(CONTEXT_VALID_BIT, &ce->flags); } - err = intel_context_active_acquire(ce, PIN_HIGH); - if (err) - return err; - - err = __context_pin_ppgtt(ce->gem_context); - if (err) - goto err_active; - return 0; +} -err_active: - intel_context_active_release(ce); - return err; +static int ring_context_pin(struct intel_context *ce) +{ + return __context_pin_ppgtt(ce); } static void ring_context_reset(struct intel_context *ce) { - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); } static const struct intel_context_ops ring_context_ops = { + .alloc = ring_context_alloc, + .pin = ring_context_pin, .unpin = ring_context_unpin, @@ -1477,45 +1350,38 @@ static const struct intel_context_ops ring_context_ops = { .destroy = ring_context_destroy, }; -static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) +static int load_pd_dir(struct i915_request *rq, + const struct i915_ppgtt *ppgtt, + u32 valid) { const struct intel_engine_cs * const engine = rq->engine; u32 *cs; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); - *cs++ = PP_DIR_DCLV_2G; + *cs++ = valid; *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd->base.ggtt_offset << 10; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int flush_pd_dir(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - u32 *cs; + *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Stall until the page table load is complete */ + /* Stall until the page table load is complete? */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = i915_scratch_offset(rq->i915); - *cs++ = MI_NOOP; + *cs++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); intel_ring_advance(rq, cs); - return 0; + + return rq->engine->emit_flush(rq, EMIT_FLUSH); } static inline int mi_set_context(struct i915_request *rq, u32 flags) @@ -1524,19 +1390,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; const int num_engines = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(i915)) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - /* We need to save the extended state for powersaving modes */ - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - len = 4; if (IS_GEN(i915, 7)) len += 2 + (num_engines ? 4 * num_engines + 6 : 0); @@ -1560,7 +1418,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1601,7 +1459,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; + *cs++ = i915_ggtt_offset(rq->context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1614,7 +1472,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1627,7 +1485,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_scratch_offset(rq->i915); + *cs++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1640,7 +1499,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) return 0; } -static int remap_l3(struct i915_request *rq, int slice) +static int remap_l3_slice(struct i915_request *rq, int slice) { u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; @@ -1668,120 +1527,97 @@ static int remap_l3(struct i915_request *rq, int slice) return 0; } -static int switch_context(struct i915_request *rq) +static int remap_l3(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *ctx = rq->gem_context; - struct i915_address_space *vm = - ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm; - unsigned int unwind_mm = 0; - u32 hw_flags = 0; - int ret, i; + struct i915_gem_context *ctx = i915_request_gem_context(rq); + int i, err; - GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + if (!ctx || !ctx->remap_slice) + return 0; - if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int loops; + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(ctx->remap_slice & BIT(i))) + continue; - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } + err = remap_l3_slice(rq, i); + if (err) + return err; } - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); + ctx->remap_slice = 0; + return 0; +} - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ - if (i915_gem_context_is_kernel(ctx)) - hw_flags = MI_RESTORE_INHIBIT; +static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) +{ + int ret; - ret = mi_set_context(rq, hw_flags); - if (ret) - goto err_mm; - } + if (!vm) + return 0; - if (vm) { - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - goto err_mm; + ret = rq->engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; - ret = flush_pd_dir(rq); - if (ret) - goto err_mm; + /* + * Not only do we need a full barrier (post-sync write) after + * invalidating the TLBs, but we need to wait a little bit + * longer. Whether this is merely delaying us, or the + * subsequent flush is a key part of serialising with the + * post-sync op, this extra pass appears vital before a + * mm switch! + */ + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); + if (ret) + return ret; - /* - * Not only do we need a full barrier (post-sync write) after - * invalidating the TLBs, but we need to wait a little bit - * longer. Whether this is merely delaying us, or the - * subsequent flush is a key part of serialising with the - * post-sync op, this extra pass appears vital before a - * mm switch! - */ - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - goto err_mm; + return rq->engine->emit_flush(rq, EMIT_INVALIDATE); +} - ret = engine->emit_flush(rq, EMIT_FLUSH); - if (ret) - goto err_mm; - } +static int switch_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + int ret; - if (ctx->remap_slice) { - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(ctx->remap_slice & BIT(i))) - continue; + GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - ret = remap_l3(rq, i); - if (ret) - goto err_mm; - } + ret = switch_mm(rq, vm_alias(ce)); + if (ret) + return ret; + + if (ce->state) { + u32 flags; + + GEM_BUG_ON(rq->engine->id != RCS0); - ctx->remap_slice = 0; + /* For resource streamer on HSW+ and power context elsewhere */ + BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); + BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); + + flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; + if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) + flags |= MI_RESTORE_EXT_STATE_EN; + else + flags |= MI_RESTORE_INHIBIT; + + ret = mi_set_context(rq, flags); + if (ret) + return ret; } - return 0; + ret = remap_l3(rq); + if (ret) + return ret; -err_mm: - if (unwind_mm) - i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; -err: - return ret; + return 0; } static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); + GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -1803,140 +1639,6 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) -{ - struct i915_request *target; - long timeout; - - if (intel_ring_update_space(ring) >= bytes) - return 0; - - GEM_BUG_ON(list_empty(&ring->request_list)); - list_for_each_entry(target, &ring->request_list, ring_link) { - /* Would completion of this request free enough space? */ - if (bytes <= __intel_ring_space(target->postfix, - ring->emit, ring->size)) - break; - } - - if (WARN_ON(&target->ring_link == &ring->request_list)) - return -ENOSPC; - - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; - - i915_request_retire_upto(target); - - intel_ring_update_space(ring); - GEM_BUG_ON(ring->space < bytes); - return 0; -} - -u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) -{ - struct intel_ring *ring = rq->ring; - const unsigned int remain_usable = ring->effective_size - ring->emit; - const unsigned int bytes = num_dwords * sizeof(u32); - unsigned int need_wrap = 0; - unsigned int total_bytes; - u32 *cs; - - /* Packets must be qword aligned. */ - GEM_BUG_ON(num_dwords & 1); - - total_bytes = bytes + rq->reserved_space; - GEM_BUG_ON(total_bytes > ring->effective_size); - - if (unlikely(total_bytes > remain_usable)) { - const int remain_actual = ring->size - ring->emit; - - if (bytes > remain_usable) { - /* - * Not enough space for the basic request. So need to - * flush out the remainder and then wait for - * base + reserved. - */ - total_bytes += remain_actual; - need_wrap = remain_actual | 1; - } else { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate - * wrap and only need to effectively wait for the - * reserved size from the start of ringbuffer. - */ - total_bytes = rq->reserved_space + remain_actual; - } - } - - if (unlikely(total_bytes > ring->space)) { - int ret; - - /* - * Space is reserved in the ringbuffer for finalising the - * request, as that cannot be allowed to fail. During request - * finalisation, reserved_space is set to 0 to stop the - * overallocation and the assumption is that then we never need - * to wait (which has the risk of failing with EINTR). - * - * See also i915_request_alloc() and i915_request_add(). - */ - GEM_BUG_ON(!rq->reserved_space); - - ret = wait_for_space(ring, total_bytes); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (unlikely(need_wrap)) { - need_wrap &= ~1; - GEM_BUG_ON(need_wrap > ring->space); - GEM_BUG_ON(ring->emit + need_wrap > ring->size); - GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); - - /* Fill the tail with MI_NOOP */ - memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); - ring->space -= need_wrap; - ring->emit = 0; - } - - GEM_BUG_ON(ring->emit > ring->size - bytes); - GEM_BUG_ON(ring->space < bytes); - cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); - ring->emit += bytes; - ring->space -= bytes; - - return cs; -} - -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - static void gen6_bsd_submit_request(struct i915_request *request) { struct intel_uncore *uncore = request->engine->uncore; @@ -2070,7 +1772,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; - engine->cancel_requests = cancel_requests; engine->park = NULL; engine->unpark = NULL; @@ -2082,7 +1783,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = gen6_bsd_submit_request; } -static void ring_destroy(struct intel_engine_cs *engine) +static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -2091,10 +1792,11 @@ static void ring_destroy(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - intel_ring_unpin(engine->buffer); - intel_ring_put(engine->buffer); + intel_ring_unpin(engine->legacy.ring); + intel_ring_put(engine->legacy.ring); - kfree(engine); + intel_timeline_unpin(engine->legacy.timeline); + intel_timeline_put(engine->legacy.timeline); } static void setup_irq(struct intel_engine_cs *engine) @@ -2125,11 +1827,10 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); - engine->destroy = ring_destroy; - engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; - engine->reset.reset = reset_ring; + engine->reset.rewind = reset_rewind; + engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; engine->cops = &ring_context_ops; @@ -2166,11 +1867,9 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (INTEL_GEN(i915) >= 7) { - engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(i915, 6)) { - engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(i915, 5)) { @@ -2242,6 +1941,10 @@ static void setup_vecs(struct intel_engine_cs *engine) int intel_ring_submission_setup(struct intel_engine_cs *engine) { + struct intel_timeline *timeline; + struct intel_ring *ring; + int err; + setup_common(engine); switch (engine->class) { @@ -2262,48 +1965,44 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - return 0; -} - -int intel_ring_submission_init(struct intel_engine_cs *engine) -{ - struct i915_timeline *timeline; - struct intel_ring *ring; - int err; - - timeline = i915_timeline_create(engine->i915, engine->status_page.vma); + timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } GEM_BUG_ON(timeline->has_initial_breadcrumb); - ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); + err = intel_timeline_pin(timeline); + if (err) + goto err_timeline; + + ring = intel_engine_create_ring(engine, SZ_16K); if (IS_ERR(ring)) { err = PTR_ERR(ring); - goto err; + goto err_timeline_unpin; } err = intel_ring_pin(ring); if (err) goto err_ring; - GEM_BUG_ON(engine->buffer); - engine->buffer = ring; + GEM_BUG_ON(engine->legacy.ring); + engine->legacy.ring = ring; + engine->legacy.timeline = timeline; - err = intel_engine_init_common(engine); - if (err) - goto err_unpin; + GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = ring_release; return 0; -err_unpin: - intel_ring_unpin(ring); err_ring: intel_ring_put(ring); +err_timeline_unpin: + intel_timeline_unpin(timeline); +err_timeline: + intel_timeline_put(timeline); err: intel_engine_cleanup_common(engine); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h new file mode 100644 index 000000000000..d9f17f38e0cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_TYPES_H +#define INTEL_RING_TYPES_H + +#include <linux/atomic.h> +#include <linux/kref.h> +#include <linux/types.h> + +/* + * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) + +struct i915_vma; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +#endif /* INTEL_RING_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c new file mode 100644 index 000000000000..d2a3d935d186 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -0,0 +1,1903 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_rps.h" +#include "intel_sideband.h" +#include "../../../platform/x86/intel_ips.h" + +/* + * Lock protecting IPS related data structures + */ +static DEFINE_SPINLOCK(mchdev_lock); + +static struct intel_gt *rps_to_gt(struct intel_rps *rps) +{ + return container_of(rps, struct intel_gt, rps); +} + +static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) +{ + return rps_to_gt(rps)->i915; +} + +static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) +{ + return rps_to_gt(rps)->uncore; +} + +static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) +{ + return mask & ~rps->pm_intrmsk_mbz; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static u32 rps_pm_mask(struct intel_rps *rps, u8 val) +{ + u32 mask = 0; + + /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= (GEN6_PM_RP_UP_EI_EXPIRED | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= rps->pm_events; + + return rps_pm_sanitize_mask(rps, ~mask); +} + +static void rps_reset_ei(struct intel_rps *rps) +{ + memset(&rps->ei, 0, sizeof(rps->ei)); +} + +static void rps_enable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps_reset_ei(rps); + + if (IS_VALLEYVIEW(gt->i915)) + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_enable_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); + + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq)); +} + +static void gen6_rps_reset_interrupts(struct intel_rps *rps) +{ + gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); +} + +static void gen11_rps_reset_interrupts(struct intel_rps *rps) +{ + while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) + ; +} + +static void rps_reset_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock_irq(>->irq_lock); + if (INTEL_GEN(gt->i915) >= 11) + gen11_rps_reset_interrupts(rps); + else + gen6_rps_reset_interrupts(rps); + + rps->pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +static void rps_disable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps->pm_events = 0; + + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(>->irq_lock); + + intel_synchronize_irq(gt->i915); + + /* + * Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + + rps_reset_interrupts(rps); +} + +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; + +static void gen5_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fmax, fmin, fstart; + u32 rgvmodectl; + int c_m, i; + + if (i915->fsb_freq <= 3200) + c_m = 0; + else if (i915->fsb_freq <= 4800) + c_m = 1; + else + c_m = 2; + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + rps->ips.m = cparams[i].m; + rps->ips.c = cparams[i].c; + break; + } + } + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->min_freq = fmax; + rps->max_freq = fmin; + + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +static unsigned long +__ips_chipset_val(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + unsigned long now = jiffies_to_msecs(jiffies), dt; + unsigned long result; + u64 total, delta; + + lockdep_assert_held(&mchdev_lock); + + /* + * Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + dt = now - ips->last_time1; + if (dt <= 10) + return ips->chipset_power; + + /* FIXME: handle per-counter overflow */ + total = intel_uncore_read(uncore, DMIEC); + total += intel_uncore_read(uncore, DDREC); + total += intel_uncore_read(uncore, CSIEC); + + delta = total - ips->last_count1; + + result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); + + ips->last_count1 = total; + ips->last_time1 = now; + + ips->chipset_power = result; + + return result; +} + +static unsigned long ips_mch_val(struct intel_uncore *uncore) +{ + unsigned int m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(uncore, TSFS); + x = intel_uncore_read8(uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; + + return m * x / 127 - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + + if (INTEL_INFO(i915)->is_mobile) + return max(vd - 1125, 0); + + return vd; +} + +static void __gen5_ips_update(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + u64 now, delta, dt; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + dt = now - ips->last_time2; + do_div(dt, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (dt <= 10) + return; + + count = intel_uncore_read(uncore, GFXEC); + delta = count - ips->last_count2; + + ips->last_count2 = count; + ips->last_time2 = now; + + /* More magic constants... */ + ips->gfx_power = div_u64(delta * 1181, dt * 10); +} + +static void gen5_rps_update(struct intel_rps *rps) +{ + spin_lock_irq(&mchdev_lock); + __gen5_ips_update(&rps->ips); + spin_unlock_irq(&mchdev_lock); +} + +static bool gen5_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + /* Invert the frequency bin into an ips delay */ + val = rps->max_freq - val; + val = rps->min_freq + val; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return div * 133333 / (pre << post); +} + +static unsigned int init_emon(struct intel_uncore *uncore) +{ + u8 pxw[16]; + int i; + + /* Disable to program */ + intel_uncore_write(uncore, ECR, 0); + intel_uncore_posting_read(uncore, ECR); + + /* Program energy weights for various events */ + intel_uncore_write(uncore, SDEW, 0x15040d00); + intel_uncore_write(uncore, CSIEW0, 0x007f0000); + intel_uncore_write(uncore, CSIEW1, 0x1e220004); + intel_uncore_write(uncore, CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + intel_uncore_write(uncore, PEW(i), 0); + for (i = 0; i < 3; i++) + intel_uncore_write(uncore, DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); + unsigned int freq = intel_pxfreq(pxvidfreq); + unsigned int vid = + (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + unsigned int val; + + val = vid * vid * freq / 1000 * 255; + val /= 127 * 127 * 900; + + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + intel_uncore_write(uncore, PXW(i), + pxw[i * 4 + 0] << 24 | + pxw[i * 4 + 1] << 16 | + pxw[i * 4 + 2] << 8 | + pxw[i * 4 + 3] << 0); + } + + /* Adjust magic regs to magic values (more experimental results) */ + intel_uncore_write(uncore, OGW0, 0); + intel_uncore_write(uncore, OGW1, 0); + intel_uncore_write(uncore, EG0, 0x00007f00); + intel_uncore_write(uncore, EG1, 0x0000000e); + intel_uncore_write(uncore, EG2, 0x000e0000); + intel_uncore_write(uncore, EG3, 0x68000300); + intel_uncore_write(uncore, EG4, 0x42000000); + intel_uncore_write(uncore, EG5, 0x00140031); + intel_uncore_write(uncore, EG6, 0); + intel_uncore_write(uncore, EG7, 0); + + for (i = 0; i < 8; i++) + intel_uncore_write(uncore, PXWL(i), 0); + + /* Enable PMON + select events */ + intel_uncore_write(uncore, ECR, 0x80000019); + + return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; +} + +static bool gen5_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fstart, vstart; + u32 rgvmodectl; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, + intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, + intel_uncore_read16(uncore, TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + gen5_rps_set(rps, rps->cur_freq); + + rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); + rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); + rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); + rps->ips.last_time1 = jiffies_to_msecs(jiffies); + + rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + rps->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); + + rps->ips.corr = init_emon(uncore); + + return true; +} + +static void gen5_rps_disable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + gen5_rps_set(rps, rps->idle_freq); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +static u32 rps_limits(struct intel_rps *rps, u8 val) +{ + u32 limits; + + /* + * Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + limits = rps->max_freq_softlimit << 23; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct intel_rps *rps, int new_power) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(i915)) + goto skip_hw_write; + + set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + set(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + + set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + + set(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) +{ + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(rps, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) +{ + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && rps->active) + rps_set_power(rps, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static int gen6_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 swreq; + + if (INTEL_GEN(i915) >= 9) + swreq = GEN9_FREQUENCY(val); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + swreq = HSW_FREQUENCY(val); + else + swreq = (GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + set(uncore, GEN6_RPNSWREQ, swreq); + + return 0; +} + +static int vlv_rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + vlv_punit_get(i915); + err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(i915); + + return err; +} + +static int rps_set(struct intel_rps *rps, u8 val, bool update) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + if (INTEL_GEN(i915) < 6) + return 0; + + if (val == rps->last_freq) + return 0; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_rps_set(rps, val); + else + err = gen6_rps_set(rps, val); + if (err) + return err; + + if (update) + gen6_rps_set_thresholds(rps, val); + rps->last_freq = val; + + return 0; +} + +void intel_rps_unpark(struct intel_rps *rps) +{ + u8 freq; + + if (!rps->enabled) + return; + + /* + * Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + mutex_lock(&rps->lock); + rps->active = true; + freq = max(rps->cur_freq, rps->efficient_freq), + freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); + intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); + + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_enable_interrupts(rps); + + if (IS_GEN(rps_to_i915(rps), 5)) + gen5_rps_update(rps); +} + +void intel_rps_park(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (!rps->enabled) + return; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); + + rps->active = false; + if (rps->last_freq <= rps->idle_freq) + return; + + /* + * The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); + rps_set(rps, rps->idle_freq, false); + intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); +} + +void intel_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->engine->gt->rps; + unsigned long flags; + + if (i915_request_signaled(rq) || !rps->active) + return; + + /* Serializes with i915_request_retire() */ + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + + if (!atomic_fetch_inc(&rps->num_waiters) && + READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +int intel_rps_set(struct intel_rps *rps, u8 val) +{ + int err; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (rps->active) { + err = rps_set(rps, val, true); + if (err) + return err; + + /* + * Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + struct intel_uncore *uncore = rps_to_uncore(rps); + + set(uncore, + GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); + + set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); + } + } + + rps->cur_freq = val; + return 0; +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(i915)) { + u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(i915) || IS_BROADWELL(i915) || + IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(i915, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + (ddcc_status >> 8) & 0xff, + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static bool rps_reset(struct intel_rps *rps) +{ + /* force a reset */ + rps->power.mode = -1; + rps->last_freq = -1; + + if (rps_set(rps, rps->min_freq, true)) { + DRM_ERROR("Failed to reset RPS to initial values\n"); + return false; + } + + rps->cur_freq = rps->min_freq; + return true; +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static bool gen9_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(i915, 9)) + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(rps->rp1_freq)); + + /* 1 second timeout */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(i915, 1000000)); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + + return rps_reset(rps); +} + +static bool gen8_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + 100000000 / 128); /* 1 second timeout */ + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static bool gen6_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Power down if completely idle for over 50ms */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static int chv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(i915)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; + break; + case 12: + /* (2 * 6) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; + break; + } + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static int chv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; + + return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; +} + +static int chv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static u32 chv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static bool chv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + /* 1: Program defaults and thresholds for RPS*/ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(i915); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static int vlv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp1; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; + rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int vlv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp0; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int vlv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rpe; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int vlv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static bool vlv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(i915); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static unsigned long __ips_gfx_val(struct intel_ips *ips) +{ + struct intel_rps *rps = container_of(ips, typeof(*rps), ips); + struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); + + state1 = ext_v; + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + t = ips_mch_val(uncore); + if (t > 80) + corr = t * 2349 + 135940; + else if (t >= 50) + corr = t * 964 + 29317; + else /* < 50 */ + corr = t * 301 + 1004; + + corr = corr * 150142 * state1 / 10000 - 78642; + corr /= 100000; + corr2 = corr * ips->corr; + + state2 = corr2 * state1 / 10000; + state2 /= 100; /* convert to mW */ + + __gen5_ips_update(ips); + + return ips->gfx_power + state2; +} + +void intel_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (IS_CHERRYVIEW(i915)) + rps->enabled = chv_rps_enable(rps); + else if (IS_VALLEYVIEW(i915)) + rps->enabled = vlv_rps_enable(rps); + else if (INTEL_GEN(i915) >= 9) + rps->enabled = gen9_rps_enable(rps); + else if (INTEL_GEN(i915) >= 8) + rps->enabled = gen8_rps_enable(rps); + else if (INTEL_GEN(i915) >= 6) + rps->enabled = gen6_rps_enable(rps); + else if (IS_IRONLAKE_M(i915)) + rps->enabled = gen5_rps_enable(rps); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (!rps->enabled) + return; + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + +static void gen6_rps_disable(struct intel_rps *rps) +{ + set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); +} + +void intel_rps_disable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->enabled = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_disable(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_disable(rps); +} + +static int byt_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct intel_rps *rps, int val) +{ + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct intel_rps *rps, int val) +{ + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(i915)) + return chv_gpu_freq(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_gpu_freq(rps, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(i915)) + return chv_freq_opcode(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_freq_opcode(rps, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + +static void vlv_init_gpll_ref_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->gpll_ref_freq = + vlv_get_cck_clock(i915, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); +} + +static void vlv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + i915->mem_freq = 800; + break; + case 2: + i915->mem_freq = 1066; + break; + case 3: + i915->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = vlv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = vlv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = vlv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = vlv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void chv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_cck_read(i915, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + i915->mem_freq = 2000; + break; + default: + i915->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = chv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = chv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = chv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = chv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); + ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(uncore, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= rps_to_i915(rps)->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void rps_work(struct work_struct *work) +{ + struct intel_rps *rps = container_of(work, typeof(*rps), work); + struct intel_gt *gt = rps_to_gt(rps); + bool client_boost = false; + int new_freq, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + if ((pm_iir & rps->pm_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(rps, pm_iir); + + adj = rps->last_adj; + new_freq = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_freq < rps->boost_freq) { + new_freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; + + if (new_freq >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_freq = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_freq = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; + + if (new_freq <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_freq += adj; + new_freq = clamp_t(int, new_freq, min, max); + + if (intel_rps_set(rps, new_freq)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + spin_lock_irq(>->irq_lock); + gen6_gt_pm_unmask_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); +} + +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + const u32 events = rps->pm_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + + if (pm_iir & rps->pm_events) { + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); + rps->pm_iir |= pm_iir & rps->pm_events; + schedule_work(&rps->work); + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(gt->i915) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(gt->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + +void gen5_rps_irq_handler(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_freq; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + new_freq = rps->cur_freq; + if (busy_up > max_avg) + new_freq++; + else if (busy_down < min_avg) + new_freq--; + new_freq = clamp(new_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + rps->cur_freq = new_freq; + + spin_unlock(&mchdev_lock); +} + +void intel_rps_init_early(struct intel_rps *rps) +{ + mutex_init(&rps->lock); + mutex_init(&rps->power.mutex); + + INIT_WORK(&rps->work, rps_work); + + atomic_set(&rps->num_waiters, 0); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_CHERRYVIEW(i915)) + chv_rps_init(rps); + else if (IS_VALLEYVIEW(i915)) + vlv_rps_init(rps); + else if (INTEL_GEN(i915) >= 6) + gen6_rps_init(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_init(rps); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + u32 params = 0; + + sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(i915) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + cagf = (rpstat >> 8) & 0xff; + else if (INTEL_GEN(i915) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} + +static u32 read_cagf(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 freq; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + vlv_punit_get(i915); + freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + } else { + freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + } + + return intel_rps_get_cagf(rps, freq); +} + +u32 intel_rps_read_actual_frequency(struct intel_rps *rps) +{ + struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_gpu_freq(rps, read_cagf(rps)); + + return freq; +} + +/* External interface for intel_ips.ko */ + +static struct drm_i915_private __rcu *ips_mchdev; + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_rps_driver_register(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + /* + * We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. + */ + if (IS_GEN(gt->i915, 5)) { + GEM_BUG_ON(ips_mchdev); + rcu_assign_pointer(ips_mchdev, gt->i915); + ips_ping_for_i915_load(); + } +} + +void intel_rps_driver_unregister(struct intel_rps *rps) +{ + if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) + rcu_assign_pointer(ips_mchdev, NULL); +} + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(ips_mchdev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_ips *ips = &i915->gt.rps.ips; + + spin_lock_irq(&mchdev_lock); + chipset_val = __ips_chipset_val(ips); + graphics_val = __ips_gfx_val(ips); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit < rps->max_freq) + rps->max_freq_softlimit++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit > rps->min_freq) + rps->max_freq_softlimit--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + rps->max_freq_softlimit = rps->min_freq; + ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h new file mode 100644 index 000000000000..dfa98194f3b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_H +#define INTEL_RPS_H + +#include "intel_rps_types.h" + +struct i915_request; + +void intel_rps_init_early(struct intel_rps *rps); +void intel_rps_init(struct intel_rps *rps); + +void intel_rps_driver_register(struct intel_rps *rps); +void intel_rps_driver_unregister(struct intel_rps *rps); + +void intel_rps_enable(struct intel_rps *rps); +void intel_rps_disable(struct intel_rps *rps); + +void intel_rps_park(struct intel_rps *rps); +void intel_rps_unpark(struct intel_rps *rps); +void intel_rps_boost(struct i915_request *rq); + +int intel_rps_set(struct intel_rps *rps, u8 val); +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + +int intel_gpu_freq(struct intel_rps *rps, int val); +int intel_freq_opcode(struct intel_rps *rps, int val); +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); +u32 intel_rps_read_actual_frequency(struct intel_rps *rps); + +void gen5_rps_irq_handler(struct intel_rps *rps); +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); + +#endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h new file mode 100644 index 000000000000..c2e279154bd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_TYPES_H +#define INTEL_RPS_TYPES_H + +#include <linux/atomic.h> +#include <linux/ktime.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct intel_ips { + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c, m; +}; + +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool enabled; + bool active; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + u32 pm_events; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 last_freq; /* Last SWREQ frequency */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; + struct intel_ips ips; +}; + +#endif /* INTEL_RPS_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index a0756f006f5f..74f793423231 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,19 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice) +{ + sseu->max_slices = max_slices; + sseu->max_subslices = max_subslices; + sseu->max_eus_per_subslice = max_eus_per_subslice; + + sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); + sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); +} + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { @@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + int i, offset = slice * sseu->ss_stride; + u32 mask = 0; + + GEM_BUG_ON(slice >= sseu->max_slices); + + for (i = 0; i < sseu->ss_stride; i++) + mask |= (u32)sseu->subslice_mask[offset + i] << + i * BITS_PER_BYTE; + + return mask; +} + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); +} + unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) { - return hweight8(sseu->subslice_mask[slice]); + return hweight32(intel_sseu_get_subslices(sseu, slice)); } u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, @@ -49,7 +84,7 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, * cases which disable slices for functional, apart for performance * reasons. So in this case we select a known stable subset. */ - if (!i915->perf.oa.exclusive_stream) { + if (!i915->perf.exclusive_stream) { ctx_sseu = *req_sseu; } else { ctx_sseu = intel_sseu_from_device_info(sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index b50d0401a4e2..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -10,15 +10,21 @@ #include <linux/types.h> #include <linux/kernel.h> +#include "i915_gem.h" + struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) +#define GEN_MAX_EUS (16) /* TGL upper bound */ +#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; + u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -33,11 +39,8 @@ struct sseu_dev_info { u8 max_subslices; u8 max_eus_per_subslice; - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; + u8 ss_stride; + u8 eu_stride; }; /* @@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +static inline bool +intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + u8 mask; + int ss_idx = subslice / BITS_PER_BYTE; + + GEM_BUG_ON(ss_idx >= sseu->ss_stride); + + mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; + + return mask & BIT(subslice % BITS_PER_BYTE); +} + +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice); + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask); + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index c311ce9c6f9d..87716529cd2f 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -8,34 +8,27 @@ #include "i915_active.h" #include "i915_syncmap.h" -#include "i915_timeline.h" +#include "intel_gt.h" +#include "intel_ring.h" +#include "intel_timeline.h" #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) -struct i915_timeline_hwsp { - struct i915_gt_timelines *gt; +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS + +struct intel_timeline_hwsp { + struct intel_gt *gt; + struct intel_gt_timelines *gt_timelines; struct list_head free_link; struct i915_vma *vma; u64 free_bitmap; }; -struct i915_timeline_cacheline { - struct i915_active active; - struct i915_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - -static inline struct drm_i915_private * -hwsp_to_i915(struct i915_timeline_hwsp *hwsp) -{ - return container_of(hwsp->gt, struct drm_i915_private, gt.timelines); -} - -static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) +static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -45,7 +38,7 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) i915_gem_object_put(obj); @@ -53,11 +46,10 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) } static struct i915_vma * -hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) +hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) { - struct drm_i915_private *i915 = timeline->i915; - struct i915_gt_timelines *gt = &i915->gt.timelines; - struct i915_timeline_hwsp *hwsp; + struct intel_gt_timelines *gt = &timeline->gt->timelines; + struct intel_timeline_hwsp *hwsp; BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); @@ -75,16 +67,17 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) if (!hwsp) return ERR_PTR(-ENOMEM); - vma = __hwsp_alloc(i915); + vma = __hwsp_alloc(timeline->gt); if (IS_ERR(vma)) { kfree(hwsp); return vma; } vma->private = hwsp; + hwsp->gt = timeline->gt; hwsp->vma = vma; hwsp->free_bitmap = ~0ull; - hwsp->gt = gt; + hwsp->gt_timelines = gt; spin_lock_irq(>->hwsp_lock); list_add(&hwsp->free_link, >->hwsp_free_list); @@ -102,9 +95,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) return hwsp->vma; } -static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) +static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) { - struct i915_gt_timelines *gt = hwsp->gt; + struct intel_gt_timelines *gt = hwsp->gt_timelines; unsigned long flags; spin_lock_irqsave(>->hwsp_lock, flags); @@ -126,7 +119,7 @@ static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) spin_unlock_irqrestore(>->hwsp_lock, flags); } -static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) +static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(!i915_active_is_idle(&cl->active)); @@ -135,12 +128,13 @@ static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); i915_active_fini(&cl->active); - kfree(cl); + kfree_rcu(cl, rcu); } +__i915_active_call static void __cacheline_retire(struct i915_active *active) { - struct i915_timeline_cacheline *cl = + struct intel_timeline_cacheline *cl = container_of(active, typeof(*cl), active); i915_vma_unpin(cl->hwsp->vma); @@ -148,10 +142,19 @@ static void __cacheline_retire(struct i915_active *active) __idle_cacheline_free(cl); } -static struct i915_timeline_cacheline * -cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) +static int __cacheline_active(struct i915_active *active) { - struct i915_timeline_cacheline *cl; + struct intel_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + __i915_vma_pin(cl->hwsp->vma); + return 0; +} + +static struct intel_timeline_cacheline * +cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct intel_timeline_cacheline *cl; void *vaddr; GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); @@ -170,24 +173,24 @@ cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); - i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire); + i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); return cl; } -static void cacheline_acquire(struct i915_timeline_cacheline *cl) +static void cacheline_acquire(struct intel_timeline_cacheline *cl) { - if (cl && i915_active_acquire(&cl->active)) - __i915_vma_pin(cl->hwsp->vma); + if (cl) + i915_active_acquire(&cl->active); } -static void cacheline_release(struct i915_timeline_cacheline *cl) +static void cacheline_release(struct intel_timeline_cacheline *cl) { if (cl) i915_active_release(&cl->active); } -static void cacheline_free(struct i915_timeline_cacheline *cl) +static void cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); @@ -196,29 +199,22 @@ static void cacheline_free(struct i915_timeline_cacheline *cl) __idle_cacheline_free(cl); } -int i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *timeline, - struct i915_vma *hwsp) +int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) { void *vaddr; - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - * - * Called during early_init before we know how many engines there are. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + kref_init(&timeline->kref); + atomic_set(&timeline->pin_count, 0); + + timeline->gt = gt; - timeline->i915 = i915; - timeline->pin_count = 0; timeline->has_initial_breadcrumb = !hwsp; timeline->hwsp_cacheline = NULL; if (!hwsp) { - struct i915_timeline_cacheline *cl; + struct intel_timeline_cacheline *cl; unsigned int cacheline; hwsp = hwsp_alloc(timeline, &cacheline); @@ -253,7 +249,7 @@ int i915_timeline_init(struct drm_i915_private *i915, mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -261,72 +257,22 @@ int i915_timeline_init(struct drm_i915_private *i915, return 0; } -void i915_timelines_init(struct drm_i915_private *i915) +void intel_gt_init_timelines(struct intel_gt *gt) { - struct i915_gt_timelines *gt = &i915->gt.timelines; - - mutex_init(>->mutex); - INIT_LIST_HEAD(>->active_list); + struct intel_gt_timelines *timelines = >->timelines; - spin_lock_init(>->hwsp_lock); - INIT_LIST_HEAD(>->hwsp_free_list); + spin_lock_init(&timelines->lock); + INIT_LIST_HEAD(&timelines->active_list); - /* via i915_gem_wait_for_idle() */ - i915_gem_shrinker_taints_mutex(i915, >->mutex); + spin_lock_init(&timelines->hwsp_lock); + INIT_LIST_HEAD(&timelines->hwsp_free_list); } -static void timeline_add_to_active(struct i915_timeline *tl) +void intel_timeline_fini(struct intel_timeline *timeline) { - struct i915_gt_timelines *gt = &tl->i915->gt.timelines; - - mutex_lock(>->mutex); - list_add(&tl->link, >->active_list); - mutex_unlock(>->mutex); -} - -static void timeline_remove_from_active(struct i915_timeline *tl) -{ - struct i915_gt_timelines *gt = &tl->i915->gt.timelines; - - mutex_lock(>->mutex); - list_del(&tl->link); - mutex_unlock(>->mutex); -} - -/** - * i915_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gt_timelines *gt = &i915->gt.timelines; - struct i915_timeline *timeline; - - mutex_lock(>->mutex); - list_for_each_entry(timeline, >->active_list, link) { - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&timeline->sync); - } - mutex_unlock(>->mutex); -} - -void i915_timeline_fini(struct i915_timeline *timeline) -{ - GEM_BUG_ON(timeline->pin_count); + GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); - - i915_syncmap_free(&timeline->sync); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); @@ -336,73 +282,125 @@ void i915_timeline_fini(struct i915_timeline *timeline) i915_vma_put(timeline->hwsp_ggtt); } -struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, - struct i915_vma *global_hwsp) +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; int err; timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); if (!timeline) return ERR_PTR(-ENOMEM); - err = i915_timeline_init(i915, timeline, global_hwsp); + err = intel_timeline_init(timeline, gt, global_hwsp); if (err) { kfree(timeline); return ERR_PTR(err); } - kref_init(&timeline->kref); - return timeline; } -int i915_timeline_pin(struct i915_timeline *tl) +int intel_timeline_pin(struct intel_timeline *tl) { int err; - if (tl->pin_count++) + if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; - GEM_BUG_ON(!tl->pin_count); err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto unpin; + return err; tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); cacheline_acquire(tl->hwsp_cacheline); - timeline_add_to_active(tl); + if (atomic_fetch_inc(&tl->pin_count)) { + cacheline_release(tl->hwsp_cacheline); + __i915_vma_unpin(tl->hwsp_ggtt); + } return 0; +} -unpin: - tl->pin_count = 0; - return err; +void intel_timeline_enter(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ + lockdep_assert_held(&tl->mutex); + + if (atomic_add_unless(&tl->active_count, 1, 0)) + return; + + spin_lock(&timelines->lock); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + spin_unlock(&timelines->lock); } -static u32 timeline_advance(struct i915_timeline *tl) +void intel_timeline_exit(struct intel_timeline *tl) { - GEM_BUG_ON(!tl->pin_count); + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + /* See intel_timeline_enter() */ + lockdep_assert_held(&tl->mutex); + + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) + return; + + spin_lock(&timelines->lock); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + spin_unlock(&timelines->lock); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); +} + +static u32 timeline_advance(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); return tl->seqno += 1 + tl->has_initial_breadcrumb; } -static void timeline_rollback(struct i915_timeline *tl) +static void timeline_rollback(struct intel_timeline *tl) { tl->seqno -= 1 + tl->has_initial_breadcrumb; } static noinline int -__i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) +__intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) { - struct i915_timeline_cacheline *cl; + struct intel_timeline_cacheline *cl; unsigned int cacheline; struct i915_vma *vma; void *vaddr; @@ -452,8 +450,7 @@ __i915_timeline_get_seqno(struct i915_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, - tl->fence_context, rq); + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); if (err) goto err_cacheline; @@ -488,92 +485,89 @@ err_rollback: return err; } -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) { *seqno = timeline_advance(tl); /* Replace the HWSP on wraparound for HW semaphores */ if (unlikely(!*seqno && tl->hwsp_cacheline)) - return __i915_timeline_get_seqno(tl, rq, seqno); + return __intel_timeline_get_seqno(tl, rq, seqno); return 0; } -static int cacheline_ref(struct i915_timeline_cacheline *cl, +static int cacheline_ref(struct intel_timeline_cacheline *cl, struct i915_request *rq) { - return i915_active_ref(&cl->active, rq->fence.context, rq); + return i915_active_add_request(&cl->active, rq); } -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *to, - u32 *hwsp) +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) { - struct i915_timeline_cacheline *cl = from->hwsp_cacheline; - struct i915_timeline *tl = from->timeline; + struct intel_timeline_cacheline *cl; int err; - GEM_BUG_ON(to->timeline == tl); - - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) - err = cacheline_ref(cl, to); - if (!err) { - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } - } - mutex_unlock(&tl->mutex); + GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); + + rcu_read_lock(); + cl = rcu_dereference(from->hwsp_cacheline); + if (unlikely(!i915_active_acquire_if_busy(&cl->active))) + goto unlock; /* seqno wrapped and completed! */ + if (unlikely(i915_request_completed(from))) + goto release; + rcu_read_unlock(); + + err = cacheline_ref(cl, to); + if (err) + goto out; + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; + +out: + i915_active_release(&cl->active); return err; + +release: + i915_active_release(&cl->active); +unlock: + rcu_read_unlock(); + return 1; } -void i915_timeline_unpin(struct i915_timeline *tl) +void intel_timeline_unpin(struct intel_timeline *tl) { - GEM_BUG_ON(!tl->pin_count); - if (--tl->pin_count) + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + if (!atomic_dec_and_test(&tl->pin_count)) return; - timeline_remove_from_active(tl); cacheline_release(tl->hwsp_cacheline); - /* - * Since this timeline is idle, all bariers upon which we were waiting - * must also be complete and so we can discard the last used barriers - * without loss of information. - */ - i915_syncmap_free(&tl->sync); - __i915_vma_unpin(tl->hwsp_ggtt); } -void __i915_timeline_free(struct kref *kref) +void __intel_timeline_free(struct kref *kref) { - struct i915_timeline *timeline = + struct intel_timeline *timeline = container_of(kref, typeof(*timeline), kref); - i915_timeline_fini(timeline); - kfree(timeline); + intel_timeline_fini(timeline); + kfree_rcu(timeline, rcu); } -void i915_timelines_fini(struct drm_i915_private *i915) +void intel_gt_fini_timelines(struct intel_gt *gt) { - struct i915_gt_timelines *gt = &i915->gt.timelines; - - GEM_BUG_ON(!list_empty(>->active_list)); - GEM_BUG_ON(!list_empty(>->hwsp_free_list)); + struct intel_gt_timelines *timelines = >->timelines; - mutex_destroy(>->mutex); + GEM_BUG_ON(!list_empty(&timelines->active_list)); + GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_timeline.c" +#include "gt/selftests/mock_timeline.c" +#include "gt/selftest_timeline.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h new file mode 100644 index 000000000000..f5b7eade3809 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -0,0 +1,94 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H + +#include <linux/lockdep.h> + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline_types.h" + +int intel_timeline_init(struct intel_timeline *tl, + struct intel_gt *gt, + struct i915_vma *hwsp); +void intel_timeline_fini(struct intel_timeline *tl); + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); + +static inline struct intel_timeline * +intel_timeline_get(struct intel_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __intel_timeline_free(struct kref *kref); +static inline void intel_timeline_put(struct intel_timeline *timeline) +{ + kref_put(&timeline->kref, __intel_timeline_free); +} + +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + +int intel_timeline_pin(struct intel_timeline *tl); +void intel_timeline_enter(struct intel_timeline *tl); +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno); +void intel_timeline_exit(struct intel_timeline *tl); +void intel_timeline_unpin(struct intel_timeline *tl); + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + +void intel_gt_init_timelines(struct intel_gt *gt); +void intel_gt_fini_timelines(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h new file mode 100644 index 000000000000..02181c5020db --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include <linux/list.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/rcupdate.h> +#include <linux/types.h> + +#include "i915_active_types.h" + +struct i915_vma; +struct i915_syncmap; +struct intel_gt; +struct intel_timeline_hwsp; + +struct intel_timeline { + u64 fence_context; + u32 seqno; + + struct mutex mutex; /* protects the flow of requests */ + + /* + * pin_count and active_count track essentially the same thing: + * How many requests are in flight or may be under construction. + * + * We need two distinct counters so that we can assign different + * lifetimes to the events for different use-cases. For example, + * we want to permanently keep the timeline pinned for the kernel + * context so that we can issue requests at any time without having + * to acquire space in the GGTT. However, we want to keep tracking + * the activity (to be able to detect when we become idle) along that + * permanently pinned timeline and so end up requiring two counters. + * + * Note that the active_count is protected by the intel_timeline.mutex, + * but the pin_count is protected by a combination of serialisation + * from the intel_context caller plus internal atomicity. + */ + atomic_t pin_count; + atomic_t active_count; + + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct intel_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* + * Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_fence_get(), or manage the RCU + * protection themselves (cf the i915_active_fence API). + */ + struct i915_active_fence last_request; + + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + struct list_head link; + struct intel_gt *gt; + + struct kref kref; + struct rcu_head rcu; +}; + +struct intel_timeline_cacheline { + struct i915_active active; + + struct intel_timeline_hwsp *hwsp; + void *vaddr; + + struct rcu_head rcu; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 98dfb086320f..4e292d4bf7b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -6,6 +6,9 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_ring.h" #include "intel_workarounds.h" /** @@ -49,9 +52,10 @@ * - Public functions to init or apply the given workaround type. */ -static void wa_init_start(struct i915_wa_list *wal, const char *name) +static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) { wal->name = name; + wal->engine_name = engine_name; } #define WA_LIST_CHUNK (1 << 4) @@ -73,8 +77,8 @@ static void wa_init_finish(struct i915_wa_list *wal) if (!wal->count) return; - DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", - wal->wa_count, wal->name); + DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", + wal->wa_count, wal->name, wal->engine_name); } static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) @@ -143,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } } -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) +static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val, u32 read_mask) { struct i915_wa wa = { .reg = reg, .mask = mask, .val = val, - .read = mask, + .read = read_mask, }; _wa_add(wal, &wa); } static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + wa_add(wal, reg, mask, val, mask); +} + +static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); @@ -175,19 +185,6 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } -static void -ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val, - /* Bonkers HW, skip verifying */ - }; - - _wa_add(wal, &wa); -} - #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -257,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaDisableDopClockGating:bdw * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, @@ -308,11 +305,6 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, @@ -536,12 +528,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); - /* WaDisableBankHangMode:icl */ - wa_write(wal, - GEN8_L3CNTLREG, - intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | - GEN8_ERRDETBCTRL); - /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable */ @@ -586,6 +572,29 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN11_SAMPLER_ENABLE_HEADLESS_MSG); } +static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u32 val; + + /* Wa_1409142259:tgl */ + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + + /* Wa_1604555607:tgl */ + val = intel_uncore_read(engine->uncore, FF_MODE2); + val &= ~FF_MODE2_TDS_TIMER_MASK; + val |= FF_MODE2_TDS_TIMER_128; + /* + * FIXME: FF_MODE2 register is not readable till TGL B0. We can + * enable verification of WA from the later steppings, which enables + * the read of FF_MODE2. + */ + wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, + IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : + FF_MODE2_TDS_TIMER_MASK); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -596,9 +605,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) return; - wa_init_start(wal, name); + wa_init_start(wal, name, engine->name); - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 11)) icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) cnl_ctx_workarounds_init(engine, wal); @@ -766,7 +777,10 @@ static void wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - u32 mcr_slice_subslice_mask; + unsigned int slice, subslice; + u32 l3_en, mcr, mcr_mask; + + GEM_BUG_ON(INTEL_GEN(i915) < 10); /* * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl @@ -774,42 +788,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * the case, we might need to program MCR select to a valid L3Bank * by default, to make sure we correctly read certain registers * later on (in the range 0xB100 - 0xB3FF). - * This might be incompatible with - * WaProgramMgsrForCorrectSliceSpecificMmioReads. - * Fortunately, this should not happen in production hardware, so - * we only assert that this is the case (instead of implementing - * something more complex that requires checking the range of every - * MMIO read). - */ - if (INTEL_GEN(i915) >= 10 && - is_power_of_2(sseu->slice_mask)) { - /* - * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches - * enabled subslice, no need to redirect MCR packet - */ - u32 slice = fls(sseu->slice_mask); - u32 fuse3 = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); - u8 ss_mask = sseu->subslice_mask[slice]; - - u8 enabled_mask = (ss_mask | ss_mask >> - GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; - u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; - - /* - * Production silicon should have matched L3Bank and - * subslice enabled - */ - WARN_ON((enabled_mask & disabled_mask) != enabled_mask); - } - - if (INTEL_GEN(i915) >= 11) - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - else - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - /* + * * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl * Before any MMIO read into slice/subslice specific registers, MCR * packet control register needs to be programmed to point to any @@ -819,11 +798,50 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * are consistent across s/ss in almost all cases. In the rare * occasions, such as INSTDONE, where this value is dependent * on s/ss combo, the read should be done with read_subslice_reg. + * + * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both + * to which subslice, or to which L3 bank, the respective mmio reads + * will go, we have to find a common index which works for both + * accesses. + * + * Case where we cannot find a common index fortunately should not + * happen in production hardware, so we only emit a warning instead of + * implementing something more complex that requires checking the range + * of every MMIO read. */ - wa_write_masked_or(wal, - GEN8_MCR_SELECTOR, - mcr_slice_subslice_mask, - intel_calculate_mcr_s_ss_select(i915)); + + if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { + u32 l3_fuse = + intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + + DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); + l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); + } else { + l3_en = ~0; + } + + slice = fls(sseu->slice_mask) - 1; + subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); + if (!subslice) { + DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", + intel_sseu_get_subslices(sseu, slice), l3_en); + subslice = fls(l3_en); + WARN_ON(!subslice); + } + subslice--; + + if (INTEL_GEN(i915) >= 11) { + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + } else { + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + } + + DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); + + wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); } static void @@ -897,12 +915,35 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + + /* Wa_1607087056:icl */ + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); +} + +static void +tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* Wa_1409420604:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); + + /* Wa_1409180338:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 11)) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) cnl_gt_workarounds_init(i915, wal); @@ -926,7 +967,7 @@ void intel_gt_init_workarounds(struct drm_i915_private *i915) { struct i915_wa_list *wal = &i915->gt_wa_list; - wa_init_start(wal, "GT"); + wa_init_start(wal, "GT", "global"); gt_init_workarounds(i915, wal); wa_init_finish(wal); } @@ -990,9 +1031,9 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) spin_unlock_irqrestore(&uncore->lock, flags); } -void intel_gt_apply_workarounds(struct drm_i915_private *i915) +void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(&i915->uncore, &i915->gt_wa_list); + wa_list_apply(gt->uncore, >->i915->gt_wa_list); } static bool wa_list_verify(struct intel_uncore *uncore, @@ -1011,10 +1052,23 @@ static bool wa_list_verify(struct intel_uncore *uncore, return ok; } -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from) +bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); + return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); +} + +static inline bool is_nonpriv_flags_valid(u32 flags) +{ + /* Check only valid flag bits are set */ + if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) + return false; + + /* NB: Only 3 out of 4 enum values are valid for access field */ + if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_INVALID) + return false; + + return true; } static void @@ -1027,6 +1081,9 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) return; + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) + return; + wa.reg.reg |= flags; _wa_add(wal, &wa); } @@ -1034,7 +1091,7 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) static void whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { - whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW); + whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); } static void gen9_whitelist_build(struct i915_wa_list *w) @@ -1047,6 +1104,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w) /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ whitelist_reg(w, GEN8_HDC_CHICKEN1); + + /* WaSendPushConstantsFromMMIO:skl,bxt */ + whitelist_reg(w, COMMON_SLICE_CHICKEN2); } static void skl_whitelist_build(struct intel_engine_cs *engine) @@ -1115,7 +1175,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) * - PS_DEPTH_COUNT_UDW */ whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); } @@ -1155,22 +1215,46 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) * - PS_DEPTH_COUNT_UDW */ whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: /* hucStatusRegOffset */ whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); /* hucUKernelHdrInfoRegOffset */ whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); + break; + + default: break; + } +} + +static void tgl_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + switch (engine->class) { + case RENDER_CLASS: + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + break; default: break; } @@ -1181,9 +1265,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - wa_init_start(w, "whitelist"); + wa_init_start(w, "whitelist", engine->name); - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_whitelist_build(engine); + else if (IS_GEN(i915, 11)) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); @@ -1233,6 +1319,34 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + /* Wa_1606700617:tgl */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + + /* Wa_1607138336:tgl */ + wa_write_or(wal, + GEN9_CTX_PREEMPT_REG, + GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); + + /* Wa_1607030317:tgl */ + /* Wa_1607186500:tgl */ + /* Wa_1607297627:tgl */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + + /* + * Wa_1606679103:tgl + * (see also Wa_1606682166:icl) + */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); + } + if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -1240,10 +1354,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES, - GEN8_LQSC_FLUSH_COHERENT_LINES); + wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1270,10 +1383,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, @@ -1292,6 +1404,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN7_SARCHKMD, GEN7_DISABLE_SAMPLER_PREFETCH); + + /* Wa_1409178092:icl */ + wa_write_masked_or(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); } if (IS_GEN_RANGE(i915, 9, 11)) { @@ -1360,7 +1478,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) return; - if (engine->id == RCS0) + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); @@ -1370,10 +1488,10 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) + if (INTEL_GEN(engine->i915) < 8) return; - wa_init_start(wal, engine->name); + wa_init_start(wal, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); } @@ -1416,26 +1534,50 @@ err_obj: return ERR_PTR(err); } +static bool mcr_range(struct drm_i915_private *i915, u32 offset) +{ + /* + * Registers in this range are affected by the MCR selector + * which only controls CPU initiated MMIO. Routing does not + * work for CS access so we cannot verify them on this path. + */ + if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) + return true; + + return false; +} + static int wa_list_srm(struct i915_request *rq, const struct i915_wa_list *wal, struct i915_vma *vma) { + struct drm_i915_private *i915 = rq->i915; + unsigned int i, count = 0; const struct i915_wa *wa; - unsigned int i; u32 srm, *cs; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(i915) >= 8) srm++; - cs = intel_ring_begin(rq, 4 * wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) + count++; + } + + cs = intel_ring_begin(rq, 4 * count); if (IS_ERR(cs)) return PTR_ERR(cs); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + u32 offset = i915_mmio_reg_offset(wa->reg); + + if (mcr_range(i915, offset)) + continue; + *cs++ = srm; - *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = offset; *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; *cs++ = 0; } @@ -1458,11 +1600,13 @@ static int engine_wa_list_verify(struct intel_context *ce, if (!wal->count) return 0; - vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count); + vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_engine_pm_get(ce->engine); rq = intel_context_create_request(ce); + intel_engine_pm_put(ce->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_vma; @@ -1472,25 +1616,32 @@ static int engine_wa_list_verify(struct intel_context *ce, if (err) goto err_vma; + i915_request_get(rq); i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; - goto err_vma; + goto err_rq; } results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(results)) { err = PTR_ERR(results); - goto err_vma; + goto err_rq; } err = 0; - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) + continue; + if (!wa_verify(wa, results[i], wal->name, from)) err = -ENXIO; + } i915_gem_object_unpin_map(vma->obj); +err_rq: + i915_request_put(rq); err_vma: i915_vma_unpin(vma); i915_vma_put(vma); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h index 3761a6ee58bb..8c9c769c2204 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -14,6 +14,7 @@ struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; static inline void intel_wa_list_free(struct i915_wa_list *wal) { @@ -25,9 +26,8 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); int intel_engine_emit_ctx_wa(struct i915_request *rq); void intel_gt_init_workarounds(struct drm_i915_private *i915); -void intel_gt_apply_workarounds(struct drm_i915_private *i915); -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from); +void intel_gt_apply_workarounds(struct intel_gt *gt); +bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from); void intel_engine_init_whitelist(struct intel_engine_cs *engine); void intel_engine_apply_whitelist(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 42ac1fb99572..e27ab1b710b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -20,6 +20,7 @@ struct i915_wa { struct i915_wa_list { const char *name; + const char *engine_name; struct i915_wa *list; unsigned int count; unsigned int wa_count; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 486c6953dcb1..f2806381733f 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,62 +23,59 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_pool.h" #include "mock_engine.h" #include "selftests/mock_request.h" -struct mock_ring { - struct intel_ring base; - struct i915_timeline timeline; -}; - -static void mock_timeline_pin(struct i915_timeline *tl) +static void mock_timeline_pin(struct intel_timeline *tl) { - tl->pin_count++; + atomic_inc(&tl->pin_count); } -static void mock_timeline_unpin(struct i915_timeline *tl) +static void mock_timeline_unpin(struct intel_timeline *tl) { - GEM_BUG_ON(!tl->pin_count); - tl->pin_count--; + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + atomic_dec(&tl->pin_count); } static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct mock_ring *ring; + struct intel_ring *ring; ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); if (!ring) return NULL; - if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) { + kref_init(&ring->ref); + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + atomic_set(&ring->pin_count, 1); + + ring->vma = i915_vma_alloc(); + if (!ring->vma) { kfree(ring); return NULL; } + i915_active_init(&ring->vma->active, NULL, NULL); - kref_init(&ring->base.ref); - ring->base.size = sz; - ring->base.effective_size = sz; - ring->base.vaddr = (void *)(ring + 1); - ring->base.timeline = &ring->timeline; - atomic_set(&ring->base.pin_count, 1); - - INIT_LIST_HEAD(&ring->base.request_list); - intel_ring_update_space(&ring->base); + intel_ring_update_space(ring); - return &ring->base; + return ring; } -static void mock_ring_free(struct intel_ring *base) +static void mock_ring_free(struct intel_ring *ring) { - struct mock_ring *ring = container_of(base, typeof(*ring), base); + i915_active_fini(&ring->vma->active); + i915_vma_free(ring->vma); - i915_timeline_fini(&ring->timeline); kfree(ring); } @@ -95,7 +92,7 @@ static void advance(struct i915_request *request) i915_request_mark_complete(request); GEM_BUG_ON(!i915_request_completed(request)); - intel_engine_queue_breadcrumbs(request->engine); + intel_engine_signal_breadcrumbs(request->engine); } static void hw_delay_complete(struct timer_list *t) @@ -130,7 +127,6 @@ static void hw_delay_complete(struct timer_list *t) static void mock_context_unpin(struct intel_context *ce) { - mock_timeline_unpin(ce->ring->timeline); } static void mock_context_destroy(struct kref *ref) @@ -139,37 +135,52 @@ static void mock_context_destroy(struct kref *ref) GEM_BUG_ON(intel_context_is_pinned(ce)); - if (ce->ring) + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { mock_ring_free(ce->ring); + mock_timeline_unpin(ce->timeline); + } + intel_context_fini(ce); intel_context_free(ce); } -static int mock_context_pin(struct intel_context *ce) +static int mock_context_alloc(struct intel_context *ce) { - int ret; - - if (!ce->ring) { - ce->ring = mock_ring(ce->engine); - if (!ce->ring) - return -ENOMEM; + ce->ring = mock_ring(ce->engine); + if (!ce->ring) + return -ENOMEM; + + GEM_BUG_ON(ce->timeline); + ce->timeline = intel_timeline_create(ce->engine->gt, NULL); + if (IS_ERR(ce->timeline)) { + kfree(ce->engine); + return PTR_ERR(ce->timeline); } - ret = intel_context_active_acquire(ce, PIN_HIGH); - if (ret) - return ret; + mock_timeline_pin(ce->timeline); - mock_timeline_pin(ce->ring->timeline); return 0; } +static int mock_context_pin(struct intel_context *ce) +{ + return 0; +} + +static void mock_context_reset(struct intel_context *ce) +{ +} + static const struct intel_context_ops mock_context_ops = { + .alloc = mock_context_alloc, + .pin = mock_context_pin, .unpin = mock_context_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .reset = mock_context_reset, .destroy = mock_context_destroy, }; @@ -216,16 +227,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine) { } -static void mock_reset(struct intel_engine_cs *engine, bool stalled) +static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) { GEM_BUG_ON(stalled); } -static void mock_reset_finish(struct intel_engine_cs *engine) -{ -} - -static void mock_cancel_requests(struct intel_engine_cs *engine) +static void mock_reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -243,6 +250,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void mock_reset_finish(struct intel_engine_cs *engine) +{ +} + +static void mock_engine_release(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + + intel_engine_fini_retire(engine); + intel_engine_fini_breadcrumbs(engine); +} + struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id) @@ -250,6 +275,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); + GEM_BUG_ON(!i915->gt.uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -257,9 +283,13 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; + engine->base.gt = &i915->gt; + engine->base.uncore = i915->gt.uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); + engine->base.legacy_idx = INVALID_ENGINE; + engine->base.instance = id; engine->base.status_page.addr = (void *)(engine + 1); engine->base.cops = &mock_context_ops; @@ -269,38 +299,41 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; engine->base.reset.prepare = mock_reset_prepare; - engine->base.reset.reset = mock_reset; + engine->base.reset.rewind = mock_reset_rewind; + engine->base.reset.cancel = mock_reset_cancel; engine->base.reset.finish = mock_reset_finish; - engine->base.cancel_requests = mock_cancel_requests; + + engine->base.release = mock_engine_release; + + i915->gt.engine[id] = &engine->base; + i915->gt.engine_class[0][id] = &engine->base; /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + intel_engine_add_user(&engine->base); + return &engine->base; } int mock_engine_init(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; - int err; + struct intel_context *ce; intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); + intel_engine_pool_init(&engine->pool); - engine->kernel_context = - i915_gem_context_get_engine(i915->kernel_context, engine->id); - if (IS_ERR(engine->kernel_context)) - goto err_breadcrumbs; - - err = intel_context_pin(engine->kernel_context); - intel_context_put(engine->kernel_context); - if (err) + ce = create_kernel_context(engine); + if (IS_ERR(ce)) goto err_breadcrumbs; + engine->kernel_context = ce; return 0; err_breadcrumbs: @@ -325,17 +358,3 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { } - -void mock_engine_free(struct intel_engine_cs *engine) -{ - struct mock_engine *mock = - container_of(engine, typeof(*mock), base); - - GEM_BUG_ON(timer_pending(&mock->hw_delay)); - - intel_context_unpin(engine->kernel_context); - - intel_engine_fini_breadcrumbs(engine); - - kfree(engine); -} diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c new file mode 100644 index 000000000000..e874dfaa5316 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -0,0 +1,443 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_selftest.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" + +#include "gem/selftests/mock_context.h" +#include "selftests/igt_flush_test.h" +#include "selftests/mock_drm.h" + +static int request_sync(struct i915_request *rq) +{ + struct intel_timeline *tl = i915_request_timeline(rq); + long timeout; + int err = 0; + + intel_timeline_get(tl); + i915_request_get(rq); + + /* Opencode i915_request_add() so we can keep the timeline locked. */ + __i915_request_commit(rq); + __i915_request_queue(rq, NULL); + + timeout = i915_request_wait(rq, 0, HZ / 10); + if (timeout < 0) + err = timeout; + else + i915_request_retire_upto(rq); + + lockdep_unpin_lock(&tl->mutex, rq->cookie); + mutex_unlock(&tl->mutex); + + i915_request_put(rq); + intel_timeline_put(tl); + + return err; +} + +static int context_sync(struct intel_context *ce) +{ + struct intel_timeline *tl = ce->timeline; + int err = 0; + + mutex_lock(&tl->mutex); + do { + struct i915_request *rq; + long timeout; + + if (list_empty(&tl->requests)) + break; + + rq = list_last_entry(&tl->requests, typeof(*rq), link); + i915_request_get(rq); + + timeout = i915_request_wait(rq, 0, HZ / 10); + if (timeout < 0) + err = timeout; + else + i915_request_retire_upto(rq); + + i915_request_put(rq); + } while (!err); + mutex_unlock(&tl->mutex); + + return err; +} + +static int __live_context_size(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + void *vaddr; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err; + + vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915)); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + intel_context_unpin(ce); + goto err; + } + + /* + * Note that execlists also applies a redzone which it checks on + * context unpin when debugging. We are using the same location + * and same poison value so that our checks overlap. Despite the + * redundancy, we want to keep this little selftest so that we + * get coverage of any and all submission backends, and we can + * always extend this test to ensure we trick the HW into a + * compromising position wrt to the various sections that need + * to be written into the context state. + * + * TLDR; this overlaps with the execlists redzone. + */ + vaddr += engine->context_size - I915_GTT_PAGE_SIZE; + memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); + + rq = intel_context_create_request(ce); + intel_context_unpin(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = request_sync(rq); + if (err) + goto err_unpin; + + /* Force the context switch */ + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + err = request_sync(rq); + if (err) + goto err_unpin; + + if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { + pr_err("%s context overwrote trailing red-zone!", engine->name); + err = -EINVAL; + } + +err_unpin: + i915_gem_object_unpin_map(ce->state->obj); +err: + intel_context_put(ce); + return err; +} + +static int live_context_size(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check that our context sizes are correct by seeing if the + * HW tries to write past the end of one. + */ + + for_each_engine(engine, gt, id) { + struct { + struct drm_i915_gem_object *state; + void *pinned; + } saved; + + if (!engine->context_size) + continue; + + intel_engine_pm_get(engine); + + /* + * Hide the old default state -- we lie about the context size + * and get confused when the default state is smaller than + * expected. For our do nothing request, inheriting the + * active state is sufficient, we are only checking that we + * don't use more than we planned. + */ + saved.state = fetch_and_zero(&engine->default_state); + saved.pinned = fetch_and_zero(&engine->pinned_default_state); + + /* Overlaps with the execlists redzone */ + engine->context_size += I915_GTT_PAGE_SIZE; + + err = __live_context_size(engine); + + engine->context_size -= I915_GTT_PAGE_SIZE; + + engine->pinned_default_state = saved.pinned; + engine->default_state = saved.state; + + intel_engine_pm_put(engine); + + if (err) + break; + } + + return err; +} + +static int __live_active_context(struct intel_engine_cs *engine) +{ + unsigned long saved_heartbeat; + struct intel_context *ce; + int pass; + int err; + + /* + * We keep active contexts alive until after a subsequent context + * switch as the final write from the context-save will be after + * we retire the final request. We track when we unpin the context, + * under the presumption that the final pin is from the last request, + * and instead of immediately unpinning the context, we add a task + * to unpin the context from the next idle-barrier. + * + * This test makes sure that the context is kept alive until a + * subsequent idle-barrier (emitted when the engine wakeref hits 0 + * with no more outstanding requests). + */ + + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is awake before starting %s!\n", + engine->name, __func__); + return -EINVAL; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + for (pass = 0; pass <= 2; pass++) { + struct i915_request *rq; + + intel_engine_pm_get(engine); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_engine; + } + + err = request_sync(rq); + if (err) + goto out_engine; + + /* Context will be kept active until after an idle-barrier. */ + if (i915_active_is_idle(&ce->active)) { + pr_err("context is not active; expected idle-barrier (%s pass %d)\n", + engine->name, pass); + err = -EINVAL; + goto out_engine; + } + + if (!intel_engine_pm_is_awake(engine)) { + pr_err("%s is asleep before idle-barrier\n", + engine->name); + err = -EINVAL; + goto out_engine; + } + +out_engine: + intel_engine_pm_put(engine); + if (err) + goto err; + } + + /* Now make sure our idle-barriers are flushed */ + err = intel_engine_flush_barriers(engine); + if (err) + goto err; + + /* Wait for the barrier and in the process wait for engine to park */ + err = context_sync(engine->kernel_context); + if (err) + goto err; + + if (!i915_active_is_idle(&ce->active)) { + pr_err("context is still active!"); + err = -EINVAL; + } + + intel_engine_pm_flush(engine); + + if (intel_engine_pm_is_awake(engine)) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, + "%s is still awake:%d after idle-barriers\n", + engine->name, + atomic_read(&engine->wakeref.count)); + GEM_TRACE_DUMP(); + + err = -EINVAL; + goto err; + } + +err: + engine->props.heartbeat_interval_ms = saved_heartbeat; + intel_context_put(ce); + return err; +} + +static int live_active_context(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, gt, id) { + err = __live_active_context(engine); + if (err) + break; + + err = igt_flush_test(gt->i915); + if (err) + break; + } + + return err; +} + +static int __remote_sync(struct intel_context *ce, struct intel_context *remote) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(remote); + if (err) + return err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto unpin; + } + + err = intel_context_prepare_remote_request(remote, rq); + if (err) { + i915_request_add(rq); + goto unpin; + } + + err = request_sync(rq); + +unpin: + intel_context_unpin(remote); + return err; +} + +static int __live_remote_context(struct intel_engine_cs *engine) +{ + struct intel_context *local, *remote; + unsigned long saved_heartbeat; + int pass; + int err; + + /* + * Check that our idle barriers do not interfere with normal + * activity tracking. In particular, check that operating + * on the context image remotely (intel_context_prepare_remote_request), + * which inserts foreign fences into intel_context.active, does not + * clobber the idle-barrier. + */ + + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is awake before starting %s!\n", + engine->name, __func__); + return -EINVAL; + } + + remote = intel_context_create(engine); + if (IS_ERR(remote)) + return PTR_ERR(remote); + + local = intel_context_create(engine); + if (IS_ERR(local)) { + err = PTR_ERR(local); + goto err_remote; + } + + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + intel_engine_pm_get(engine); + + for (pass = 0; pass <= 2; pass++) { + err = __remote_sync(local, remote); + if (err) + break; + + err = __remote_sync(engine->kernel_context, remote); + if (err) + break; + + if (i915_active_is_idle(&remote->active)) { + pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", + engine->name, pass); + err = -EINVAL; + break; + } + } + + intel_engine_pm_put(engine); + engine->props.heartbeat_interval_ms = saved_heartbeat; + + intel_context_put(local); +err_remote: + intel_context_put(remote); + return err; +} + +static int live_remote_context(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, gt, id) { + err = __live_remote_context(engine); + if (err) + break; + + err = igt_flush_test(gt->i915); + if (err) + break; + } + + return err; +} + +int intel_context_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_context_size), + SUBTEST(live_active_context), + SUBTEST(live_remote_context), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c new file mode 100644 index 000000000000..f65b118e261d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine.c @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "selftest_engine.h" + +int intel_engine_live_selftests(struct drm_i915_private *i915) +{ + static int (* const tests[])(struct intel_gt *) = { + live_engine_pm_selftests, + NULL, + }; + struct intel_gt *gt = &i915->gt; + typeof(*tests) *fn; + + for (fn = tests; *fn; fn++) { + int err; + + err = (*fn)(gt); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.h b/drivers/gpu/drm/i915/gt/selftest_engine.h new file mode 100644 index 000000000000..ab32d09ec5a1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_ENGINE_H +#define SELFTEST_ENGINE_H + +struct intel_gt; + +int live_engine_pm_selftests(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index cfaa6b296835..f88e445a1cae 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -4,7 +4,365 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" +#include <linux/sort.h> + +#include "intel_gt_pm.h" +#include "intel_rps.h" + +#include "i915_selftest.h" +#include "selftests/igt_flush_test.h" + +#define COUNT 5 + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + return *a - *b; +} + +static void perf_begin(struct intel_gt *gt) +{ + intel_gt_pm_get(gt); + + /* Boost gpufreq to max [waitboost] and keep it fixed */ + atomic_inc(>->rps.num_waiters); + schedule_work(>->rps.work); + flush_work(>->rps.work); +} + +static int perf_end(struct intel_gt *gt) +{ + atomic_dec(>->rps.num_waiters); + intel_gt_pm_put(gt); + + return igt_flush_test(gt->i915); +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 cmd; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_vma *create_empty_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + cs[0] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static u32 trifilter(u32 *a) +{ + u64 sum; + + sort(a, COUNT, sizeof(*a), cmp_u32, NULL); + + sum = mul_u32_u32(a[2], 2); + sum += a[1]; + sum += a[3]; + + return sum >> 2; +} + +static int perf_mi_bb_start(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *batch; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + batch = create_empty_batch(ce); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(batch); + if (err) { + intel_engine_pm_put(engine); + i915_vma_put(batch); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; + } + i915_vma_put(batch); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: MI_BB_START cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +static struct i915_vma *create_nop_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + memset(cs, 0, SZ_64K); + cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int perf_mi_noop(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *base, *nop; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + base = create_empty_batch(ce); + if (IS_ERR(base)) { + err = PTR_ERR(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(base); + if (err) { + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + nop = create_nop_batch(ce); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(nop); + if (err) { + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + base->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + nop->node.start, + nop->node.size, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 4); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = + (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - + (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); + } + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: 16K MI_NOOP cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_mi_bb_start), + SUBTEST(perf_mi_noop), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} static int intel_mmio_bases_check(void *arg) { @@ -12,19 +370,18 @@ static int intel_mmio_bases_check(void *arg) for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { const struct engine_info *info = &intel_engines[i]; - char name[INTEL_ENGINE_CS_MAX_NAME]; u8 prev = U8_MAX; - __sprint_engine_name(name, info); - for (j = 0; j < MAX_MMIO_BASES; j++) { u8 gen = info->mmio_bases[j].gen; u32 base = info->mmio_bases[j].base; if (gen >= prev) { - pr_err("%s: %s: mmio base for gen %x " - "is before the one for gen %x\n", - __func__, name, prev, gen); + pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", + __func__, + intel_engine_class_repr(info->class), + info->class, info->instance, + prev, gen); return -EINVAL; } @@ -32,17 +389,22 @@ static int intel_mmio_bases_check(void *arg) break; if (!base) { - pr_err("%s: %s: invalid mmio base (%x) " - "for gen %x at entry %u\n", - __func__, name, base, gen, j); + pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", + __func__, + intel_engine_class_repr(info->class), + info->class, info->instance, + base, gen, j); return -EINVAL; } prev = gen; } - pr_info("%s: min gen supported for %s = %d\n", - __func__, name, prev); + pr_debug("%s: min gen supported for %s%d is %d\n", + __func__, + intel_engine_class_repr(info->class), + info->instance, + prev); } return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c new file mode 100644 index 000000000000..43d4d589749f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -0,0 +1,374 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include <linux/sort.h> + +#include "i915_drv.h" + +#include "intel_gt_requests.h" +#include "i915_selftest.h" + +static int timeline_sync(struct intel_timeline *tl) +{ + struct dma_fence *fence; + long timeout; + + fence = i915_active_fence_get(&tl->last_request); + if (!fence) + return 0; + + timeout = dma_fence_wait_timeout(fence, true, HZ / 2); + dma_fence_put(fence); + if (timeout < 0) + return timeout; + + return 0; +} + +static int engine_sync_barrier(struct intel_engine_cs *engine) +{ + return timeline_sync(engine->kernel_context->timeline); +} + +struct pulse { + struct i915_active active; + struct kref kref; +}; + +static int pulse_active(struct i915_active *active) +{ + kref_get(&container_of(active, struct pulse, active)->kref); + return 0; +} + +static void pulse_free(struct kref *kref) +{ + kfree(container_of(kref, struct pulse, kref)); +} + +static void pulse_put(struct pulse *p) +{ + kref_put(&p->kref, pulse_free); +} + +static void pulse_retire(struct i915_active *active) +{ + pulse_put(container_of(active, struct pulse, active)); +} + +static struct pulse *pulse_create(void) +{ + struct pulse *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return p; + + kref_init(&p->kref); + i915_active_init(&p->active, pulse_active, pulse_retire); + + return p; +} + +static void pulse_unlock_wait(struct pulse *p) +{ + i915_active_unlock_wait(&p->active); +} + +static int __live_idle_pulse(struct intel_engine_cs *engine, + int (*fn)(struct intel_engine_cs *cs)) +{ + struct pulse *p; + int err; + + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + p = pulse_create(); + if (!p) + return -ENOMEM; + + err = i915_active_acquire(&p->active); + if (err) + goto out; + + err = i915_active_acquire_preallocate_barrier(&p->active, engine); + if (err) { + i915_active_release(&p->active); + goto out; + } + + i915_active_acquire_barrier(&p->active); + i915_active_release(&p->active); + + GEM_BUG_ON(i915_active_is_idle(&p->active)); + GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); + + err = fn(engine); + if (err) + goto out; + + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); + + if (engine_sync_barrier(engine)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: no heartbeat pulse?\n", engine->name); + intel_engine_dump(engine, &m, "%s", engine->name); + + err = -ETIME; + goto out; + } + + GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); + + pulse_unlock_wait(p); /* synchronize with the retirement callback */ + + if (!i915_active_is_idle(&p->active)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: heartbeat pulse did not flush idle tasks\n", + engine->name); + i915_active_print(&p->active, &m); + + err = -EINVAL; + goto out; + } + +out: + pulse_put(p); + return err; +} + +static int live_idle_flush(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_flush_barriers); + intel_engine_pm_put(engine); + if (err) + break; + } + + return err; +} + +static int live_idle_pulse(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that heartbeat pulses flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_pulse); + intel_engine_pm_put(engine); + if (err && err != -ENODEV) + break; + + err = 0; + } + + return err; +} + +static int cmp_u32(const void *_a, const void *_b) +{ + const u32 *a = _a, *b = _b; + + return *a - *b; +} + +static int __live_heartbeat_fast(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + ktime_t t0, t1; + u32 times[5]; + int err; + int i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + intel_engine_pm_get(engine); + + err = intel_engine_set_heartbeat(engine, 1); + if (err) + goto err_pm; + + for (i = 0; i < ARRAY_SIZE(times); i++) { + /* Manufacture a tick */ + do { + while (READ_ONCE(engine->heartbeat.systole)) + flush_delayed_work(&engine->heartbeat.work); + + engine->serial++; /* quick, pretend we are not idle! */ + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat did not start\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + rq = i915_request_get_rcu(rq); + rcu_read_unlock(); + } while (!rq); + + t0 = ktime_get(); + while (rq == READ_ONCE(engine->heartbeat.systole)) + yield(); /* work is on the local cpu! */ + t1 = ktime_get(); + + i915_request_put(rq); + times[i] = ktime_us_delta(t1, t0); + } + + sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); + + pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + times[0], + times[ARRAY_SIZE(times) - 1]); + + /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ + if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { + pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + jiffies_to_usecs(6)); + err = -EINVAL; + } + + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + intel_context_put(ce); + return err; +} + +static int live_heartbeat_fast(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that the heartbeat ticks at the desired rate. */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + err = __live_heartbeat_fast(engine); + if (err) + break; + } + + return err; +} + +static int __live_heartbeat_off(struct intel_engine_cs *engine) +{ + int err; + + intel_engine_pm_get(engine); + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat not running\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + err = intel_engine_set_heartbeat(engine, 0); + if (err) + goto err_pm; + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat still running\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + + if (READ_ONCE(engine->heartbeat.systole)) { + pr_err("%s: heartbeat still allocated\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + +err_beat: + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + return err; +} + +static int live_heartbeat_off(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can turn off heartbeat and not interrupt VIP */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + err = __live_heartbeat_off(engine); + if (err) + break; + } + + return err; +} + +int intel_heartbeat_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_idle_flush), + SUBTEST(live_idle_pulse), + SUBTEST(live_heartbeat_fast), + SUBTEST(live_heartbeat_off), + }; + int saved_hangcheck; + int err; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + saved_hangcheck = i915_modparams.enable_hangcheck; + i915_modparams.enable_hangcheck = INT_MAX; + + err = intel_gt_live_subtests(tests, &i915->gt); + + i915_modparams.enable_hangcheck = saved_hangcheck; + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c new file mode 100644 index 000000000000..cbf6b0735272 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "selftest_engine.h" +#include "selftests/igt_atomic.h" + +static int live_engine_pm(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Check we can call intel_engine_pm_put from any context. No + * failures are reported directly, but if we mess up lockdep should + * tell us. + */ + if (intel_gt_pm_wait_for_idle(gt)) { + pr_err("Unable to flush GT pm before test\n"); + return -EBUSY; + } + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); + for_each_engine(engine, gt, id) { + const typeof(*igt_atomic_phases) *p; + + for (p = igt_atomic_phases; p->name; p++) { + /* + * Acquisition is always synchronous, except if we + * know that the engine is already awake, in which + * case we should use intel_engine_pm_get_if_awake() + * to atomically grab the wakeref. + * + * In practice, + * intel_engine_pm_get(); + * intel_engine_pm_put(); + * occurs in one thread, while simultaneously + * intel_engine_pm_get_if_awake(); + * intel_engine_pm_put(); + * occurs from atomic context in another. + */ + GEM_BUG_ON(intel_engine_pm_is_awake(engine)); + intel_engine_pm_get(engine); + + p->critical_section_begin(); + if (!intel_engine_pm_get_if_awake(engine)) + pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", + engine->name, p->name); + else + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); + p->critical_section_end(); + + intel_engine_pm_flush(engine); + + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is still awake after flushing pm\n", + engine->name); + return -EINVAL; + } + + /* gt wakeref is async (deferred to workqueue) */ + if (intel_gt_pm_wait_for_idle(gt)) { + pr_err("GT failed to idle\n"); + return -EINVAL; + } + } + } + + return 0; +} + +int live_engine_pm_selftests(struct intel_gt *gt) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_engine_pm), + }; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c new file mode 100644 index 000000000000..09ff8e4f88af --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -0,0 +1,79 @@ + +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "selftest_llc.h" +#include "selftest_rc6.h" + +static int live_gt_resume(void *arg) +{ + struct intel_gt *gt = arg; + IGT_TIMEOUT(end_time); + int err; + + /* Do several suspend/resume cycles to check we don't explode! */ + do { + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); + + if (gt->rc6.enabled) { + pr_err("rc6 still enabled after suspend!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = intel_gt_resume(gt); + if (err) + break; + + if (gt->rc6.supported && !gt->rc6.enabled) { + pr_err("rc6 not enabled upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = st_llc_verify(>->llc); + if (err) { + pr_err("llc state not restored upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + break; + } + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +int intel_gt_pm_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_rc6_manual), + SUBTEST(live_gt_resume), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} + +int intel_gt_pm_late_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + /* + * These tests may leave the system in an undesirable state. + * They are intended to be run last in CI and the system + * rebooted afterwards. + */ + SUBTEST(live_rc6_ctx_wa), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 1ee4c923044f..3e5e6c86e843 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -25,13 +25,15 @@ #include <linux/kthread.h> #include "gem/i915_gem_context.h" + +#include "intel_gt.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" -#include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" #include "selftests/mock_drm.h" @@ -42,7 +44,7 @@ #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { - struct drm_i915_private *i915; + struct intel_gt *gt; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; @@ -50,27 +52,27 @@ struct hang { u32 *batch; }; -static int hang_init(struct hang *h, struct drm_i915_private *i915) +static int hang_init(struct hang *h, struct intel_gt *gt) { void *vaddr; int err; memset(h, 0, sizeof(*h)); - h->i915 = i915; + h->gt = gt; - h->ctx = kernel_context(i915); + h->ctx = kernel_context(gt->i915); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); - h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->hws)) { err = PTR_ERR(h->hws); goto err_ctx; } - h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->obj)) { err = PTR_ERR(h->obj); goto err_hws; @@ -85,7 +87,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) h->seqno = memset(vaddr, 0xff, PAGE_SIZE); vaddr = i915_gem_object_pin_map(h->obj, - i915_coherent_map_type(i915)); + i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -118,7 +120,10 @@ static int move_to_active(struct i915_vma *vma, int err; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, flags); + err = i915_request_await_object(rq, vma->obj, + flags & EXEC_OBJECT_WRITE); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, flags); i915_vma_unlock(vma); return err; @@ -127,47 +132,52 @@ static int move_to_active(struct i915_vma *vma, static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = h->i915; - struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm; + struct intel_gt *gt = h->gt; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; unsigned int flags; + void *vaddr; u32 *batch; int err; - if (i915_gem_object_is_active(h->obj)) { - struct drm_i915_gem_object *obj; - void *vaddr; - - obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) { + i915_vm_put(vm); + return ERR_CAST(obj); + } - vaddr = i915_gem_object_pin_map(obj, - i915_coherent_map_type(h->i915)); - if (IS_ERR(vaddr)) { - i915_gem_object_put(obj); - return ERR_CAST(vaddr); - } + vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + i915_vm_put(vm); + return ERR_CAST(vaddr); + } - i915_gem_object_unpin_map(h->obj); - i915_gem_object_put(h->obj); + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); - h->obj = obj; - h->batch = vaddr; - } + h->obj = obj; + h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_vm_put(vm); return ERR_CAST(vma); + } hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) + if (IS_ERR(hws)) { + i915_vm_put(vm); return ERR_CAST(hws); + } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) + if (err) { + i915_vm_put(vm); return ERR_PTR(err); + } err = i915_vma_pin(hws, 0, 0, PIN_USER); if (err) @@ -188,7 +198,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) goto cancel_rq; batch = h->batch; - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(gt->i915) >= 8) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); @@ -202,7 +212,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 6) { + } else if (INTEL_GEN(gt->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -215,7 +225,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(gt->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -242,7 +252,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ - i915_gem_chipset_flush(h->i915); + intel_gt_chipset_flush(engine->gt); if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); @@ -251,7 +261,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(gt->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); @@ -265,6 +275,7 @@ unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); + i915_vm_put(vm); return err ? ERR_PTR(err) : rq; } @@ -276,7 +287,7 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(h->i915); + intel_gt_chipset_flush(h->gt); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -286,7 +297,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -299,9 +310,27 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int igt_hang_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -310,13 +339,12 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) - goto unlock; + return err; - for_each_engine(engine, i915, id) { - struct igt_wedge_me w; + for_each_engine(engine, gt, id) { + struct intel_wedge_me w; long timeout; if (!intel_engine_can_store_dword(engine)) @@ -333,15 +361,15 @@ static int igt_hang_sanitycheck(void *arg) i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(engine->gt); i915_request_add(rq); timeout = 0; - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) timeout = -EIO; i915_request_put(rq); @@ -356,8 +384,6 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -368,43 +394,33 @@ static bool wait_for_idle(struct intel_engine_cs *engine) static int igt_reset_nop(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; unsigned int reset_count, count; enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; IGT_TIMEOUT(end_time); int err = 0; /* Check that we can reset during non-user portions of requests */ - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { + struct intel_context *ce; int i; + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -412,83 +428,65 @@ static int igt_reset_nop(void *arg) i915_request_add(rq); } + + intel_context_put(ce); } - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_lock(i915); - i915_reset(i915, ALL_ENGINES, NULL); - igt_global_reset_unlock(i915); - if (i915_reset_failed(i915)) { + igt_global_reset_lock(gt); + intel_gt_reset(gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(gt); + + if (intel_gt_is_wedged(gt)) { err = -EIO; break; } - if (i915_reset_count(&i915->gpu_error) != - reset_count + ++count) { + if (i915_reset_count(global) != reset_count + ++count) { pr_err("Full GPU reset not recorded!\n"); err = -EINVAL; break; } - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - -out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; } static int igt_reset_nop_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; - int err = 0; /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - for_each_engine(engine, i915, id) { - unsigned int reset_count, reset_engine_count; - unsigned int count; + for_each_engine(engine, gt, id) { + unsigned int reset_count, reset_engine_count, count; + struct intel_context *ce; + unsigned long heartbeat; IGT_TIMEOUT(end_time); + int err; - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + engine_heartbeat_disable(engine, &heartbeat); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -499,11 +497,10 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -511,21 +508,19 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } - mutex_unlock(&i915->drm.struct_mutex); - - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != reset_engine_count + ++count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -533,31 +528,24 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - pr_info("%s(%s): %d resets\n", __func__, engine->name, count); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + engine_heartbeat_enable(engine, heartbeat); - if (err) - break; + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); - err = igt_flush_test(i915, 0); + intel_context_put(ce); + if (igt_flush_test(gt->i915)) + err = -EIO; if (err) - break; + return err; } - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); -out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) - err = -EIO; - return err; + return 0; } -static int __igt_reset_engine(struct drm_i915_private *i915, bool active) +static int __igt_reset_engine(struct intel_gt *gt, bool active) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -565,19 +553,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; if (active) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) return err; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -590,30 +577,26 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); - intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + engine_heartbeat_disable(engine, &heartbeat); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { struct i915_request *rq; - mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -628,19 +611,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) i915_request_put(rq); } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != ++reset_engine_count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -648,25 +631,22 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - intel_engine_pm_put(engine); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + engine_heartbeat_enable(engine, heartbeat); if (err) break; - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; - if (active) { - mutex_lock(&i915->drm.struct_mutex); + if (active) hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); - } return err; } @@ -707,7 +687,7 @@ static int active_request_put(struct i915_request *rq) rq->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(rq->i915); + intel_gt_set_wedged(rq->engine->gt); err = -EIO; } @@ -722,47 +702,42 @@ static int active_engine(void *data) struct active_engine *arg = data; struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; - struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; - struct drm_file *file; - unsigned long count = 0; + struct intel_context *ce[ARRAY_SIZE(rq)]; + unsigned long count; int err = 0; - file = mock_file(engine->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[count])) { - err = PTR_ERR(ctx[count]); + for (count = 0; count < ARRAY_SIZE(ce); count++) { + ce[count] = intel_context_create(engine); + if (IS_ERR(ce[count])) { + err = PTR_ERR(ce[count]); while (--count) - i915_gem_context_put(ctx[count]); - goto err_file; + intel_context_put(ce[count]); + return err; } } + count = 0; while (!kthread_should_stop()) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; - mutex_lock(&engine->i915->drm.struct_mutex); - new = igt_request_alloc(ctx[idx], engine); + new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } - if (arg->flags & TEST_PRIORITY) - ctx[idx]->sched.priority = - i915_prandom_u32_max_state(512, &prng); - rq[idx] = i915_request_get(new); i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); + + if (engine->schedule && arg->flags & TEST_PRIORITY) { + struct i915_sched_attr attr = { + .priority = + i915_prandom_u32_max_state(512, &prng), + }; + engine->schedule(rq[idx], &attr); + } err = active_request_put(old); if (err) @@ -777,17 +752,18 @@ static int active_engine(void *data) /* Keep the first error */ if (!err) err = err__; + + intel_context_put(ce[count]); } -err_file: - mock_file_free(engine->i915, file); return err; } -static int __igt_reset_engines(struct drm_i915_private *i915, +static int __igt_reset_engines(struct intel_gt *gt, const char *test_name, unsigned int flags) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; struct hang h; @@ -797,13 +773,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915, * with any other engine. */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) return err; @@ -811,10 +785,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915, h.ctx->sched.priority = 1024; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; - unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -829,12 +804,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt, tmp) { struct task_struct *tsk; threads[tmp].resets = - i915_reset_engine_count(&i915->gpu_error, - other); + i915_reset_engine_count(global, other); if (!(flags & TEST_OTHERS)) continue; @@ -856,26 +830,25 @@ static int __igt_reset_engines(struct drm_i915_private *i915, get_task_struct(tsk); } - intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + yield(); /* start all threads before we begin */ + + engine_heartbeat_disable(engine, &heartbeat); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -888,7 +861,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", engine->name, test_name, err); @@ -900,7 +873,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (rq) { if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to complete request after reset\n", @@ -910,7 +883,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, i915_request_put(rq); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; break; } @@ -920,7 +893,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to idle after reset\n", @@ -932,12 +905,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915, break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - intel_engine_pm_put(engine); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + engine_heartbeat_enable(engine, heartbeat); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported = i915_reset_engine_count(global, engine); reported -= threads[engine->id].resets; if (reported != count) { pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", @@ -947,7 +921,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } unwind: - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt, tmp) { int ret; if (!threads[tmp].task) @@ -962,22 +936,21 @@ unwind: } put_task_struct(threads[tmp].task); - if (other != engine && + if (other->uabi_class != engine->uabi_class && threads[tmp].resets != - i915_reset_engine_count(&i915->gpu_error, other)) { + i915_reset_engine_count(global, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, - i915_reset_engine_count(&i915->gpu_error, - other) - + i915_reset_engine_count(global, other) - threads[tmp].resets); if (!err) err = -EINVAL; } } - if (global != i915_reset_count(&i915->gpu_error)) { + if (device != i915_reset_count(global)) { pr_err("Global reset (count=%ld)!\n", - i915_reset_count(&i915->gpu_error) - global); + i915_reset_count(global) - device); if (!err) err = -EINVAL; } @@ -985,21 +958,16 @@ unwind: if (err) break; - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + err = igt_flush_test(gt->i915); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; - if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); + if (flags & TEST_ACTIVE) hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); - } return err; } @@ -1024,13 +992,13 @@ static int igt_reset_engines(void *arg) }, { } }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; typeof(*phases) *p; int err; for (p = phases; p->name; p++) { if (p->flags & TEST_PRIORITY) { - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) continue; } @@ -1042,38 +1010,38 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct drm_i915_private *i915, - intel_engine_mask_t mask) +static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) { - u32 count = i915_reset_count(&i915->gpu_error); + u32 count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, mask, NULL); + intel_gt_reset(gt, mask, NULL); return count; } static int igt_reset_wait(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we detect a stuck waiter and issue a reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; @@ -1083,19 +1051,19 @@ static int igt_reset_wait(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto out_rq; } - reset_count = fake_hangcheck(i915, ALL_ENGINES); + reset_count = fake_hangcheck(gt, ALL_ENGINES); timeout = i915_request_wait(rq, 0, 10); if (timeout < 0) { @@ -1105,7 +1073,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; goto out_rq; @@ -1116,10 +1084,9 @@ out_rq: fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1134,15 +1101,14 @@ static int evict_vma(void *data) { struct evict_vma *arg = data; struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; struct drm_mm_node evict = arg->vma->node; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&vm->mutex); err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); return err; } @@ -1150,57 +1116,62 @@ static int evict_vma(void *data) static int evict_fence(void *data) { struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); - /* Mark the fence register as dirty to force the mmio update. */ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); if (err) { pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; + return err; + } + + err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); + if (err) { + pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); + return err; } err = i915_vma_pin_fence(arg->vma); + i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } i915_vma_unpin_fence(arg->vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return 0; } -static int __igt_reset_evict_vma(struct drm_i915_private *i915, +static int __igt_reset_evict_vma(struct intel_gt *gt, struct i915_address_space *vm, int (*fn)(void *), unsigned int flags) { + struct intel_engine_cs *engine = gt->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; struct evict_vma arg; struct hang h; + unsigned int pin_flags; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) + return 0; + + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) - goto unlock; + return err; - obj = i915_gem_object_create_internal(i915, SZ_1M); + obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fini; @@ -1220,16 +1191,18 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, goto out_obj; } - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_obj; } - err = i915_vma_pin(arg.vma, 0, 0, - i915_vma_is_ggtt(arg.vma) ? - PIN_GLOBAL | PIN_MAPPABLE : - PIN_USER); + pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + pin_flags |= PIN_MAPPABLE; + + err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); goto out_obj; @@ -1246,7 +1219,10 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } i915_vma_lock(arg.vma); - err = i915_vma_move_to_active(arg.vma, rq, flags); + err = i915_request_await_object(rq, arg.vma->obj, + flags & EXEC_OBJECT_WRITE); + if (err == 0) + err = i915_vma_move_to_active(arg.vma, rq, flags); i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) @@ -1258,16 +1234,14 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, if (err) goto out_rq; - mutex_unlock(&i915->drm.struct_mutex); - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } @@ -1284,41 +1258,37 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } out_reset: - igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, rq->engine->mask); - igt_global_reset_unlock(i915); + igt_global_reset_lock(gt); + fake_hangcheck(gt, rq->engine->mask); + igt_global_reset_unlock(gt); if (tsk) { - struct igt_wedge_me w; + struct intel_wedge_me w; /* The reset, even indirectly, should take less than 10ms. */ - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) err = kthread_stop(tsk); put_task_struct(tsk); } - mutex_lock(&i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: i915_gem_object_put(obj); fini: hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1326,56 +1296,48 @@ unlock: static int igt_reset_evict_ggtt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_vma, EXEC_OBJECT_WRITE); } static int igt_reset_evict_ppgtt(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct drm_file *file; + struct intel_gt *gt = arg; + struct i915_ppgtt *ppgtt; int err; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + /* aliasing == global gtt locking, covered above */ + if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) + return 0; - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } + ppgtt = i915_ppgtt_create(gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); - err = 0; - if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(i915, ctx->vm, - evict_vma, EXEC_OBJECT_WRITE); + err = __igt_reset_evict_vma(gt, &ppgtt->vm, + evict_vma, EXEC_OBJECT_WRITE); + i915_vm_put(&ppgtt->vm); -out: - mock_file_free(i915, file); return err; } static int igt_reset_evict_fence(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_fence, EXEC_OBJECT_NEEDS_FENCE); } -static int wait_for_others(struct drm_i915_private *i915, +static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *exclude) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (engine == exclude) continue; @@ -1388,7 +1350,8 @@ static int wait_for_others(struct drm_i915_private *i915, static int igt_reset_queue(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -1396,14 +1359,13 @@ static int igt_reset_queue(void *arg) /* Check that we replay pending requests following a hang */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1444,7 +1406,7 @@ static int igt_reset_queue(void *arg) * (hangcheck), or we focus on resetting just one * engine and so avoid repeatedly resetting innocents. */ - err = wait_for_others(i915, engine); + err = wait_for_others(gt, engine); if (err) { pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", __func__, engine->name); @@ -1452,12 +1414,12 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto fini; } if (!wait_until_running(&h, prev)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, @@ -1468,13 +1430,13 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto fini; } - reset_count = fake_hangcheck(i915, BIT(id)); + reset_count = fake_hangcheck(gt, BIT(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1494,7 +1456,7 @@ static int igt_reset_queue(void *arg) goto fini; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); i915_request_put(rq); i915_request_put(prev); @@ -1509,11 +1471,11 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(engine->gt); i915_request_put(prev); - err = igt_flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -1521,10 +1483,9 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1532,26 +1493,25 @@ unlock: static int igt_handle_error(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; int err; /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(&i915->drm.struct_mutex); - - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) - goto err_unlock; + return err; rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -1563,28 +1523,24 @@ static int igt_handle_error(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_request; } - mutex_unlock(&i915->drm.struct_mutex); - /* Temporarily disable error capture */ - error = xchg(&i915->gpu_error.first_error, (void *)-1); + error = xchg(&global->first_error, (void *)-1); - i915_handle_error(i915, engine->mask, 0, NULL); + intel_gt_handle_error(gt, engine->mask, 0, NULL); - xchg(&i915->gpu_error.first_error, error); - - mutex_lock(&i915->drm.struct_mutex); + xchg(&global->first_error, error); if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); @@ -1596,8 +1552,6 @@ err_request: i915_request_put(rq); err_fini: hang_fini(&h); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1611,10 +1565,10 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable_nosync(t); + tasklet_disable(t); p->critical_section_begin(); - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); p->critical_section_end(); tasklet_enable(t); @@ -1629,7 +1583,6 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, static int igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p) { - struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; struct hang h; int err; @@ -1638,7 +1591,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, if (err) return err; - err = hang_init(&h, i915); + err = hang_init(&h, engine->gt); if (err) return err; @@ -1657,16 +1610,16 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(engine->gt); err = -EIO; } if (err == 0) { - struct igt_wedge_me w; + struct intel_wedge_me w; - igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) + intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; } @@ -1678,30 +1631,29 @@ out: static int igt_reset_engines_atomic(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); + igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; @@ -1710,11 +1662,9 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); - + igt_force_reset(gt); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); return err; } @@ -1736,28 +1686,21 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), }; + struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; - bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ - - err = i915_subtests(tests, i915); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + err = intel_gt_live_subtests(tests, gt); - i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c new file mode 100644 index 000000000000..fd3770e48ac7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_pm.h" /* intel_gpu_freq() */ +#include "selftest_llc.h" +#include "intel_rps.h" + +static int gen6_verify_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + intel_wakeref_t wakeref; + unsigned int gpu_freq; + int err = 0; + + wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm); + + if (!get_ia_constants(llc, &consts)) { + err = -ENODEV; + goto out_rpm; + } + + for (gpu_freq = consts.min_gpu_freq; + gpu_freq <= consts.max_gpu_freq; + gpu_freq++) { + struct intel_rps *rps = &llc_to_gt(llc)->rps; + + unsigned int ia_freq, ring_freq, found; + u32 val; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + + val = gpu_freq; + if (sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &val, NULL)) { + pr_err("Failed to read freq table[%d], range [%d, %d]\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq); + err = -ENXIO; + break; + } + + found = (val >> 0) & 0xff; + if (found != ia_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ia_freq); + err = -EINVAL; + break; + } + + found = (val >> 8) & 0xff; + if (found != ring_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ring_freq); + err = -EINVAL; + break; + } + } + +out_rpm: + intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref); + return err; +} + +int st_llc_verify(struct intel_llc *llc) +{ + int err = 0; + + if (HAS_LLC(llc_to_gt(llc)->i915)) + err = gen6_verify_ring_freq(llc); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h new file mode 100644 index 000000000000..873f896e72f2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_LLC_H +#define SELFTEST_LLC_H + +struct intel_llc; + +int st_llc_verify(struct intel_llc *llc); + +#endif /* SELFTEST_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 401e8b539297..65718ca2326e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -19,75 +20,776 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int live_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; struct igt_spinner spin; - intel_wakeref_t wakeref; - int err = -ENOMEM; + int err = 0; - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin, i915)) - goto err_unlock; - - ctx = kernel_context(i915); - if (!ctx) - goto err_spin; + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { + struct intel_context *ce; struct i915_request *rq; - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx; + goto out_ctx; } i915_request_add(rq); if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx; + goto out_ctx; } igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; - goto err_ctx; + goto out_ctx; } + +out_ctx: + intel_context_put(ce); + if (err) + break; } + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_restore(struct intel_gt *gt, int prio) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = -ENOMEM; + + /* + * Check that we can correctly context switch between 2 instances + * on the same engine from the same parent context. + */ + + if (igt_spinner_init(&spin, gt)) + return err; + err = 0; -err_ctx: - kernel_context_close(ctx); -err_spin: + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq[2]; + struct igt_live_test t; + unsigned long saved; + int n; + + if (prio && !intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + engine_heartbeat_disable(engine, &saved); + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + /* + * Setup the pair of contexts such that if we + * lite-restore using the RING_TAIL from ce[1] it + * will execute garbage from ce[0]->ring. + */ + memset(tmp->ring->vaddr, + POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ + tmp->ring->vma->size); + + ce[n] = tmp; + } + GEM_BUG_ON(!ce[1]->ring->size); + intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + __execlists_update_reg_state(ce[1], engine); + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_ce; + } + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); + + if (!igt_wait_for_spinner(&spin, rq[0])) { + i915_request_put(rq[0]); + goto err_ce; + } + + rq[1] = i915_request_create(ce[1]); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + i915_request_put(rq[0]); + goto err_ce; + } + + if (!prio) { + /* + * Ensure we do the switch to ce[1] on completion. + * + * rq[0] is already submitted, so this should reduce + * to a no-op (a wait on a request on the same engine + * uses the submit fence, not the completion fence), + * but it will install a dependency on rq[1] for rq[0] + * that will prevent the pair being reordered by + * timeslicing. + */ + i915_request_await_dma_fence(rq[1], &rq[0]->fence); + } + + i915_request_get(rq[1]); + i915_request_add(rq[1]); + GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); + i915_request_put(rq[0]); + + if (prio) { + struct i915_sched_attr attr = { + .priority = prio, + }; + + /* Alternatively preempt the spinner with ce[1] */ + engine->schedule(rq[1], &attr); + } + + /* And switch back to ce[0] for good measure */ + rq[0] = i915_request_create(ce[0]); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + i915_request_put(rq[1]); + goto err_ce; + } + + i915_request_await_dma_fence(rq[0], &rq[1]->fence); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + +err_ce: + tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + + engine_heartbeat_enable(engine, saved); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_switch(void *arg) +{ + return live_unlite_restore(arg, 0); +} + +static int live_unlite_preempt(void *arg) +{ + return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); +} + +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long heartbeat; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine, &heartbeat); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + i915_request_get(rq); + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + + /* Check that we do not resubmit the held request */ + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + engine_heartbeat_enable(engine, heartbeat); + intel_context_put(ce); + if (err) + break; + } + igt_spinner_fini(&spin); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int +emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + 4 * idx; + *cs++ = 0; + + if (idx > 0) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + } else { + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + return 0; +} + +static struct i915_request * +semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) +{ + struct intel_context *ce; + struct i915_request *rq; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto out_ce; + + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = emit_semaphore_chain(rq, vma, idx); + if (err == 0) + i915_request_get(rq); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_ce: + intel_context_put(ce); + return rq; +} + +static int +release_queue(struct intel_engine_cs *engine, + struct i915_vma *vma, + int idx, int prio) +{ + struct i915_sched_attr attr = { + .priority = prio, + }; + struct i915_request *rq; + u32 *cs; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + i915_request_get(rq); + i915_request_add(rq); + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); /* kick tasklet */ + + i915_request_put(rq); + + return 0; +} + +static int +slice_semaphore_queue(struct intel_engine_cs *outer, + struct i915_vma *vma, + int count) +{ + struct intel_engine_cs *engine; + struct i915_request *head; + enum intel_engine_id id; + int err, i, n = 0; + + head = semaphore_queue(outer, vma, n++); + if (IS_ERR(head)) + return PTR_ERR(head); + + for_each_engine(engine, outer->gt, id) { + for (i = 0; i < count; i++) { + struct i915_request *rq; + + rq = semaphore_queue(engine, vma, n++); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_put(rq); + } + } + + err = release_queue(outer, vma, n, INT_MAX); + if (err) + goto out; + + if (i915_request_wait(head, 0, + 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { + pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", + count, n); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(outer->gt); + err = -EIO; + } + +out: + i915_request_put(head); + return err; +} + +static int live_timeslice_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + void *vaddr; + int err = 0; + int count; + + /* + * If a request takes too long, we would like to give other users + * a fair go on the GPU. In particular, users may create batches + * that wait upon external input, where that input may even be + * supplied by another GPU job. To avoid blocking forever, we + * need to preempt the current task and replace it with another + * ready task. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_prime_number_from(count, 1, 16) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + unsigned long saved; + + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + engine_heartbeat_disable(engine, &saved); + err = slice_semaphore_queue(engine, vma, count); + engine_heartbeat_enable(engine, saved); + if (err) + goto err_pin; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + goto err_pin; + } + } + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + do { + cond_resched(); + intel_engine_flush_submission(engine); + if (i915_request_is_active(rq)) + return 0; + } while (time_before(jiffies, timeout)); + + return -ETIME; +} + +static long timeslice_threshold(const struct intel_engine_cs *engine) +{ + return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; +} + +static int live_timeslice_queue(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * Make sure that even if ELSP[0] and ELSP[1] are filled with + * timeslicing between them disabled, we *do* enable timeslicing + * if the queue demands it. (Normally, we do not submit if + * ELSP[1] is already occupied, so must rely on timeslicing to + * eject ELSP[0] in favour of the queue.) + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq, *nop; + unsigned long saved; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine_heartbeat_disable(engine, &saved); + memset(vaddr, 0, PAGE_SIZE); + + /* ELSP[0]: semaphore wait */ + rq = semaphore_queue(engine, vma, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_heartbeat; + } + engine->schedule(rq, &attr); + err = wait_for_submit(engine, rq, HZ / 2); + if (err) { + pr_err("%s: Timed out trying to submit semaphores\n", + engine->name); + goto err_rq; + } + + /* ELSP[1]: nop request */ + nop = nop_request(engine); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + goto err_rq; + } + err = wait_for_submit(engine, nop, HZ / 2); + i915_request_put(nop); + if (err) { + pr_err("%s: Timed out trying to submit nop\n", + engine->name); + goto err_rq; + } + + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Queue: semaphore signal, matching priority as semaphore */ + err = release_queue(engine, vma, 1, effective_prio(rq)); + if (err) + goto err_rq; + + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.timer.expires) && + !i915_request_completed(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + GEM_TRACE_DUMP(); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EINVAL; + } + + /* Timeslice every jiffy, so within 2 we should signal */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to timeslice into queue\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EIO; + } +err_rq: + i915_request_put(rq); +err_heartbeat: + engine_heartbeat_enable(engine, saved); + if (err) + break; + } + + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); return err; } static int live_busywait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; u32 *map; @@ -96,22 +798,19 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) - goto err_unlock; + return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_ctx_lo; @@ -123,7 +822,7 @@ static int live_busywait_preempt(void *arg) goto err_obj; } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_map; @@ -133,15 +832,18 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *lo, *hi; struct igt_live_test t; u32 *cs; + if (!intel_engine_has_preemption(engine)) + continue; + if (!intel_engine_can_store_dword(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_vma; } @@ -184,15 +886,19 @@ static int live_busywait_preempt(void *arg) *cs++ = 0; intel_ring_advance(lo, cs); + + i915_request_get(lo); i915_request_add(lo); if (wait_for(READ_ONCE(*map), 10)) { + i915_request_put(lo); err = -ETIMEDOUT; goto err_vma; } /* Low priority request should be busywaiting now */ if (i915_request_wait(lo, 0, 1) != -ETIME) { + i915_request_put(lo); pr_err("%s: Busywaiting request did not!\n", engine->name); err = -EIO; @@ -202,6 +908,7 @@ static int live_busywait_preempt(void *arg) hi = igt_request_alloc(ctx_hi, engine); if (IS_ERR(hi)) { err = PTR_ERR(hi); + i915_request_put(lo); goto err_vma; } @@ -209,6 +916,7 @@ static int live_busywait_preempt(void *arg) if (IS_ERR(cs)) { err = PTR_ERR(cs); i915_request_add(hi); + i915_request_put(lo); goto err_vma; } @@ -221,7 +929,7 @@ static int live_busywait_preempt(void *arg) i915_request_add(hi); if (i915_request_wait(lo, 0, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to preempt semaphore busywait!\n", engine->name); @@ -229,11 +937,13 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + i915_request_put(lo); + intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } GEM_BUG_ON(READ_ONCE(*map)); + i915_request_put(lo); if (igt_live_test_end(&t)) { err = -EIO; @@ -252,65 +962,74 @@ err_ctx_lo: kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } +static struct i915_request * +spinner_create_request(struct igt_spinner *spin, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 arb) +{ + struct intel_context *ce; + struct i915_request *rq; + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = igt_spinner_create_request(spin, ce, arb); + intel_context_put(ce); + return rq; +} + static int live_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -320,13 +1039,13 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -337,7 +1056,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -360,58 +1079,53 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static int live_late_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - for_each_engine(engine, i915, id) { + /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ + ctx_lo->sched.priority = I915_USER_PRIORITY(1); + + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -423,8 +1137,8 @@ static int live_late_preempt(void *arg) goto err_wedged; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_NOOP); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_NOOP); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -464,16 +1178,12 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -483,14 +1193,13 @@ struct preempt_client { struct i915_gem_context *ctx; }; -static int preempt_client_init(struct drm_i915_private *i915, - struct preempt_client *c) +static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(i915); + c->ctx = kernel_context(gt->i915); if (!c->ctx) return -ENOMEM; - if (igt_spinner_init(&c->spin, i915)) + if (igt_spinner_init(&c->spin, gt)) goto err_ctx; return 0; @@ -506,16 +1215,435 @@ static void preempt_client_fini(struct preempt_client *c) kernel_context_close(c->ctx); } +static int live_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct preempt_client a, b; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Verify that we can disable preemption for an individual request + * that may be being observed and not want to be interrupted. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) + goto err_client_a; + b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + + for_each_engine(engine, gt, id) { + struct i915_request *rq_a, *rq_b; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine->execlists.preempt_hang.count = 0; + + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + /* Low priority client, but unpreemptable! */ + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + + i915_request_add(rq_b); + + /* B is much more important than A! (But A is unpreemptable.) */ + GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); + + /* Wait long enough for preemption and timeslicing */ + if (igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client started too early!\n"); + goto err_wedged; + } + + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + igt_spinner_end(&b.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d; should have been suppressed!\n", + engine->execlists.preempt_hang.count); + err = -EINVAL; + goto err_wedged; + } + + if (igt_flush_test(gt->i915)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_client_b; +} + +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + intel_context_set_banned(rq[1]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + intel_context_set_banned(rq[2]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_hostile(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + int err; + + /* Preempt cancel non-preemptible spinner in ELSP0 */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); /* force reset */ + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(arg->engine->i915)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct intel_gt *gt = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (preempt_client_init(gt, &data.a)) + return -ENOMEM; + if (preempt_client_init(gt, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, gt, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + + err = __cancel_hostile(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(gt); + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) }; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -525,49 +1653,58 @@ static int live_suppress_self_preempt(void *arg) * completion event. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; /* presume black blox */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + if (intel_vgpu_active(gt->i915)) + return 0; /* GVT forces single port & request submission */ - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) goto err_client_a; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; int depth; if (!intel_engine_has_preemption(engine)) continue; + if (igt_flush_test(gt->i915)) + goto err_wedged; + + intel_engine_pm_get(engine); engine->execlists.preempt_hang.count = 0; - rq_a = igt_spinner_create_request(&a.spin, - a.ctx, engine, - MI_NOOP); + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } + /* Keep postponing the timer to avoid premature slicing */ + mod_timer(&engine->execlists.timer, jiffies + HZ); for (depth = 0; depth < 8; depth++) { - rq_b = igt_spinner_create_request(&b.spin, - b.ctx, engine, - MI_NOOP); + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_b); @@ -578,6 +1715,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -587,14 +1725,17 @@ static int live_suppress_self_preempt(void *arg) igt_spinner_end(&a.spin); if (engine->execlists.preempt_hang.count) { - pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", + engine->name, engine->execlists.preempt_hang.count, depth); + intel_engine_pm_put(engine); err = -EINVAL; goto err_client_b; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -603,17 +1744,12 @@ err_client_b: preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } @@ -632,9 +1768,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) if (!rq) return NULL; - INIT_LIST_HEAD(&rq->active_list); rq->engine = engine; + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->fence.cb_list); + rq->fence.lock = &rq->lock; + rq->fence.ops = &i915_fence_ops; + i915_sched_node_init(&rq->sched); /* mark this request as permanently incomplete */ @@ -646,6 +1786,10 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) i915_sw_fence_init(&rq->submit, dummy_notify); set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + spin_lock_init(&rq->lock); + rq->fence.lock = &rq->lock; + INIT_LIST_HEAD(&rq->fence.cb_list); + return rq; } @@ -665,11 +1809,11 @@ static void dummy_request_free(struct i915_request *dummy) static int live_suppress_wait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct preempt_client client[4]; + struct i915_request *rq[ARRAY_SIZE(client)] = {}; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; int i; @@ -679,22 +1823,19 @@ static int live_suppress_wait_preempt(void *arg) * not needlessly generate preempt-to-idle cycles. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; - if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ + if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ + return -ENOMEM; + if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ goto err_client_0; - if (preempt_client_init(i915, &client[2])) /* head of queue */ + if (preempt_client_init(gt, &client[2])) /* head of queue */ goto err_client_1; - if (preempt_client_init(i915, &client[3])) /* bystander */ + if (preempt_client_init(gt, &client[3])) /* bystander */ goto err_client_2; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { int depth; if (!intel_engine_has_preemption(engine)) @@ -704,7 +1845,6 @@ static int live_suppress_wait_preempt(void *arg) continue; for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *rq[ARRAY_SIZE(client)]; struct i915_request *dummy; engine->execlists.preempt_hang.count = 0; @@ -714,18 +1854,22 @@ static int live_suppress_wait_preempt(void *arg) goto err_client_3; for (i = 0; i < ARRAY_SIZE(client); i++) { - rq[i] = igt_spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(rq[i])) { - err = PTR_ERR(rq[i]); + struct i915_request *this; + + this = spinner_create_request(&client[i].spin, + client[i].ctx, engine, + MI_NOOP); + if (IS_ERR(this)) { + err = PTR_ERR(this); goto err_wedged; } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&rq[i]->timeline->last_request, - dummy); - i915_request_add(rq[i]); + __i915_active_fence_set(&i915_request_timeline(this)->last_request, + &dummy->fence); + + rq[i] = i915_request_get(this); + i915_request_add(this); } dummy_request_free(dummy); @@ -746,10 +1890,13 @@ static int live_suppress_wait_preempt(void *arg) goto err_wedged; } - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); + i915_request_put(rq[i]); + rq[i] = NULL; + } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -772,28 +1919,24 @@ err_client_1: preempt_client_fini(&client[1]); err_client_0: preempt_client_fini(&client[0]); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); - i915_gem_set_wedged(i915); + i915_request_put(rq[i]); + } + intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; } static int live_chain_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -802,19 +1945,16 @@ static int live_chain_preempt(void *arg) * the previously submitted spinner in B. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &hi)) - goto err_unlock; + if (preempt_client_init(gt, &hi)) + return -ENOMEM; - if (preempt_client_init(i915, &lo)) + if (preempt_client_init(gt, &lo)) goto err_client_hi; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; @@ -825,11 +1965,13 @@ static int live_chain_preempt(void *arg) if (!intel_engine_has_preemption(engine)) continue; - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); ring_size = rq->wa_tail - rq->head; @@ -842,27 +1984,29 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&lo.spin); if (i915_request_wait(rq, 0, HZ / 2) < 0) { pr_err("Timed out waiting to flush %s\n", engine->name); + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_wedged; } for_each_prime_number_from(count, 1, ring_size) { - rq = igt_spinner_create_request(&hi.spin, - hi.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); if (!igt_wait_for_spinner(&hi.spin, rq)) goto err_wedged; - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); @@ -877,36 +2021,46 @@ static int live_chain_preempt(void *arg) rq = igt_request_alloc(hi.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); engine->schedule(rq, &attr); igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to preempt over chain of %d\n", count); intel_engine_dump(engine, &p, "%s\n", engine->name); + i915_request_put(rq); goto err_wedged; } igt_spinner_end(&lo.spin); + i915_request_put(rq); rq = igt_request_alloc(lo.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to flush low priority chain of %d requests\n", count); intel_engine_dump(engine, &p, "%s\n", engine->name); + + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); } if (igt_live_test_end(&t)) { @@ -920,66 +2074,252 @@ err_client_lo: preempt_client_fini(&lo); err_client_hi: preempt_client_fini(&hi); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_lo; } +static int create_gang(struct intel_engine_cs *engine, + struct i915_request **prev) +{ + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ce; + } + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_obj; + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) + goto err_obj; + + /* Semaphore target: spin until zero */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + + if (*prev) { + u64 offset = (*prev)->batch->node.start; + + /* Terminate the spinner in the next lower priority batch. */ + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = 0; + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_obj; + + rq->batch = vma; + i915_request_get(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_rq; + + i915_gem_object_put(obj); + intel_context_put(ce); + + rq->client_link.next = &(*prev)->client_link; + *prev = rq; + return 0; + +err_rq: + i915_request_put(rq); +err_obj: + i915_gem_object_put(obj); +err_ce: + intel_context_put(ce); + return err; +} + +static int live_preempt_gang(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + /* + * Build as long a chain of preempters as we can, with each + * request higher priority than the last. Once we are ready, we release + * the last batch which then precolates down the chain, each releasing + * the next oldest in turn. The intent is to simply push as hard as we + * can with the number of preemptions, trying to exceed narrow HW + * limits. At a minimum, we insist that we can sort all the user + * high priority levels into execution order. + */ + + for_each_engine(engine, gt, id) { + struct i915_request *rq = NULL; + struct igt_live_test t; + IGT_TIMEOUT(end_time); + int prio = 0; + int err = 0; + u32 *cs; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) + return -EIO; + + do { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(prio++), + }; + + err = create_gang(engine, &rq); + if (err) + break; + + /* Submit each spinner at increasing priority */ + engine->schedule(rq, &attr); + + if (prio <= I915_PRIORITY_MAX) + continue; + + if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) + break; + + if (__igt_timeout(end_time, NULL)) + break; + } while (1); + pr_debug("%s: Preempt chain of %d requests\n", + engine->name, prio); + + /* + * Such that the last spinner is the highest priority and + * should execute first. When that spinner completes, + * it will terminate the next lowest spinner until there + * are no more spinners and the gang is complete. + */ + cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); + if (!IS_ERR(cs)) { + *cs = 0; + i915_gem_object_unpin_map(rq->batch->obj); + } else { + err = PTR_ERR(cs); + intel_gt_set_wedged(gt); + } + + while (rq) { /* wait for each rq from highest to lowest prio */ + struct i915_request *n = + list_next_entry(rq, client_link); + + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("Failed to flush chain of %d requests, at %d\n", + prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -ETIME; + } + + i915_request_put(rq); + rq = n; + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + static int live_preempt_hang(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -989,13 +2329,13 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -1011,28 +2351,28 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - i915_reset_engine(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); + intel_engine_reset(engine, NULL); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx_lo; } @@ -1047,10 +2387,105 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_preempt_timeout(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Check that we force preemption to occur by cancelling the previous + * context if it refuses to yield the GPU. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin_lo, gt)) + return -ENOMEM; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + unsigned long saved_timeout; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + /* Flush the previous CS ack before changing timeouts */ + while (READ_ONCE(engine->execlists.pending[0])) + cpu_relax(); + + saved_timeout = engine->props.preempt_timeout_ms; + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + engine->props.preempt_timeout_ms = saved_timeout; + + if (i915_request_wait(rq, 0, HZ / 10) < 0) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_lo); + i915_request_put(rq); + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); return err; } @@ -1065,7 +2500,7 @@ static int random_priority(struct rnd_state *rnd) } struct preempt_smoke { - struct drm_i915_private *i915; + struct intel_gt *gt; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; @@ -1089,7 +2524,11 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, ctx->vm, NULL); + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1108,11 +2547,13 @@ static int smoke_submit(struct preempt_smoke *smoke, if (vma) { i915_vma_lock(vma); - err = rq->engine->emit_bb_start(rq, - vma->node.start, - PAGE_SIZE, 0); + err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); i915_vma_unlock(vma); } @@ -1136,11 +2577,9 @@ static int smoke_crescendo_thread(void *arg) struct i915_gem_context *ctx = smoke_context(smoke); int err; - mutex_lock(&smoke->i915->drm.struct_mutex); err = smoke_submit(smoke, ctx, count % I915_PRIORITY_MAX, smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); if (err) return err; @@ -1161,9 +2600,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - mutex_unlock(&smoke->i915->drm.struct_mutex); - - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; if (!(flags & BATCH)) @@ -1179,8 +2616,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) get_task_struct(tsk[id]); } + yield(); /* start all threads before we kthread_stop() */ + count = 0; - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { int status; if (IS_ERR_OR_NULL(tsk[id])) @@ -1195,11 +2634,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) put_task_struct(tsk[id]); } - mutex_lock(&smoke->i915->drm.struct_mutex); - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } @@ -1211,7 +2648,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count = 0; do { - for_each_engine(smoke->engine, smoke->i915, id) { + for_each_engine(smoke->engine, smoke->gt, id) { struct i915_gem_context *ctx = smoke_context(smoke); int err; @@ -1227,25 +2664,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } static int live_preempt_smoke(void *arg) { struct preempt_smoke smoke = { - .i915 = arg, + .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; struct igt_live_test t; int err = -ENOMEM; u32 *cs; int n; - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) return 0; smoke.contexts = kmalloc_array(smoke.ncontext, @@ -1254,13 +2690,11 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); - - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); + smoke.batch = + i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); - goto err_unlock; + goto err_free; } cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); @@ -1274,13 +2708,13 @@ static int live_preempt_smoke(void *arg) i915_gem_object_flush_map(smoke.batch); i915_gem_object_unpin_map(smoke.batch); - if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { + if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { err = -EIO; goto err_batch; } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915); if (!smoke.contexts[n]) goto err_ctx; } @@ -1307,15 +2741,13 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); +err_free: kfree(smoke.contexts); return err; } -static int nop_virtual_engine(struct drm_i915_private *i915, +static int nop_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling, unsigned int nctx, @@ -1323,28 +2755,18 @@ static int nop_virtual_engine(struct drm_i915_private *i915, #define CHAIN BIT(0) { IGT_TIMEOUT(end_time); - struct i915_request *request[16]; - struct i915_gem_context *ctx[16]; + struct i915_request *request[16] = {}; struct intel_context *ve[16]; unsigned long n, prime, nc; struct igt_live_test t; ktime_t times[2] = {}; int err; - GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(i915); - if (!ctx[n]) { - err = -ENOMEM; - nctx = n; - goto out; - } - - ve[n] = intel_execlists_create_virtual(ctx[n], - siblings, nsibling); + ve[n] = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { - kernel_context_close(ctx[n]); err = PTR_ERR(ve[n]); nctx = n; goto out; @@ -1353,13 +2775,12 @@ static int nop_virtual_engine(struct drm_i915_private *i915, err = intel_context_pin(ve[n]); if (err) { intel_context_put(ve[n]); - kernel_context_close(ctx[n]); nctx = n; goto out; } } - err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); if (err) goto out; @@ -1369,27 +2790,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915, if (flags & CHAIN) { for (nc = 0; nc < nctx; nc++) { for (n = 0; n < prime; n++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } else { for (n = 0; n < prime; n++) { for (nc = 0; nc < nctx; nc++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } @@ -1406,7 +2835,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); break; } } @@ -1415,6 +2844,11 @@ static int nop_virtual_engine(struct drm_i915_private *i915, if (prime == 1) times[0] = times[1]; + for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); + request[nc] = NULL; + } + if (__igt_timeout(end_time, NULL)) break; } @@ -1428,37 +2862,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); intel_context_unpin(ve[nc]); intel_context_put(ve[nc]); - kernel_context_close(ctx[nc]); } return err; } static int live_virtual_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int class, inst; - int err = -ENODEV; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - err = nop_virtual_engine(i915, &engine, 1, 1, 0); + for_each_engine(engine, gt, id) { + err = nop_virtual_engine(gt, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); - goto out_unlock; + return err; } } @@ -1467,37 +2899,34 @@ static int live_virtual_engine(void *arg) nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) continue; - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; for (n = 1; n <= nsibling + 1; n++) { - err = nop_virtual_engine(i915, siblings, nsibling, + err = nop_virtual_engine(gt, siblings, nsibling, n, 0); if (err) - goto out_unlock; + return err; } - err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } -static int mask_virtual_engine(struct drm_i915_private *i915, +static int mask_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) { struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; - struct i915_gem_context *ctx; struct intel_context *ve; struct igt_live_test t; unsigned int n; @@ -1508,11 +2937,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(i915); - if (!ctx) - return -ENOMEM; - - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -1522,7 +2947,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, if (err) goto out_put; - err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); if (err) goto out_unpin; @@ -1553,7 +2978,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto out; } @@ -1568,11 +2993,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } err = igt_live_test_end(&t); - if (err) - goto out; - out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -1583,46 +3005,183 @@ out_unpin: out_put: intel_context_put(ve); out_close: - kernel_context_close(ctx); return err; } static int live_virtual_mask(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) break; - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; - err = mask_virtual_engine(i915, siblings, nsibling); + err = mask_virtual_engine(gt, siblings, nsibling); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + return 0; +} + +static int preserved_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *last = NULL; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + unsigned int n; + int err = 0; + u32 *cs; + + scratch = create_scratch(siblings[0]->gt); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < NUM_GPR_DW; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_end; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); return err; } -static int bond_virtual_engine(struct drm_i915_private *i915, +static int live_virtual_preserved(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(gt->i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int bond_virtual_engine(struct intel_gt *gt, unsigned int class, struct intel_engine_cs **siblings, unsigned int nsibling, @@ -1630,21 +3189,59 @@ static int bond_virtual_engine(struct drm_i915_private *i915, #define BOND_SCHEDULE BIT(0) { struct intel_engine_cs *master; - struct i915_gem_context *ctx; struct i915_request *rq[16]; enum intel_engine_id id; + struct igt_spinner spin; unsigned long n; int err; + /* + * A set of bonded requests is intended to be run concurrently + * across a number of engines. We use one request per-engine + * and a magic fence to schedule each of the bonded requests + * at the same time. A consequence of our current scheduler is that + * we only move requests to the HW ready queue when the request + * becomes ready, that is when all of its prerequisite fences have + * been signaled. As one of those fences is the master submit fence, + * there is a delay on all secondary fences as the HW may be + * currently busy. Equally, as all the requests are independent, + * they may have other fences that delay individual request + * submission to HW. Ergo, we do not guarantee that all requests are + * immediately submitted to HW at the same time, just that if the + * rules are abided by, they are ready at the same time as the + * first is submitted. Userspace can embed semaphores in its batch + * to ensure parallel execution of its phases as it requires. + * Though naturally it gets requested that perhaps the scheduler should + * take care of parallel execution, even across preemption events on + * different HW. (The proper answer is of course "lalalala".) + * + * With the submit-fence, we have identified three possible phases + * of synchronisation depending on the master fence: queued (not + * ready), executing, and signaled. The first two are quite simple + * and checked below. However, the signaled master fence handling is + * contentious. Currently we do not distinguish between a signaled + * fence and an expired fence, as once signaled it does not convey + * any information about the previous execution. It may even be freed + * and hence checking later it may not exist at all. Ergo we currently + * do not apply the bonding constraint for an already signaled fence, + * as our expectation is that it should not constrain the secondaries + * and is outside of the scope of the bonded request API (i.e. all + * userspace requests are meant to be running in parallel). As + * it imposes no constraint, and is effectively a no-op, we do not + * check below as normal execution flows are checked extensively above. + * + * XXX Is the degenerate handling of signaled submit fences the + * expected behaviour for userpace? + */ + GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(i915); - if (!ctx) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; err = 0; rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, i915, id) { + for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; if (master->class == class) @@ -1652,7 +3249,9 @@ static int bond_virtual_engine(struct drm_i915_private *i915, memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - rq[0] = igt_request_alloc(ctx, master); + rq[0] = igt_spinner_create_request(&spin, + master->kernel_context, + MI_NOOP); if (IS_ERR(rq[0])) { err = PTR_ERR(rq[0]); goto out; @@ -1665,16 +3264,21 @@ static int bond_virtual_engine(struct drm_i915_private *i915, &fence, GFP_KERNEL); } + i915_request_add(rq[0]); if (err < 0) goto out; + if (!(flags & BOND_SCHEDULE) && + !igt_wait_for_spinner(&spin, rq[0])) { + err = -EIO; + goto out; + } + for (n = 0; n < nsibling; n++) { struct intel_context *ve; - ve = intel_execlists_create_virtual(ctx, - siblings, - nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); onstack_fence_fini(&fence); @@ -1716,6 +3320,8 @@ static int bond_virtual_engine(struct drm_i915_private *i915, } } onstack_fence_fini(&fence); + intel_engine_flush_submission(master); + igt_spinner_end(&spin); if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { pr_err("Master request did not execute (on %s)!\n", @@ -1749,10 +3355,10 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; - kernel_context_close(ctx); + igt_spinner_fini(&spin); return err; } @@ -1766,70 +3372,725 @@ static int live_virtual_bond(void *arg) { "schedule", BOND_SCHEDULE }, { }, }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { const struct phase *p; int nsibling; nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) break; GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; for (p = phases; p->name; p++) { - err = bond_virtual_engine(i915, + err = bond_virtual_engine(gt, class, siblings, nsibling, p->flags); if (err) { pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", __func__, p->name, class, nsibling, err); - goto out_unlock; + return err; } } } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + return 0; +} + +static int reset_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct intel_engine_cs *engine; + struct intel_context *ve; + unsigned long *heartbeat; + struct igt_spinner spin; + struct i915_request *rq; + unsigned int n; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); + if (!heartbeat) + return -ENOMEM; + + if (igt_spinner_init(&spin, gt)) { + err = -ENOMEM; + goto out_free; + } + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_spin; + } + + for (n = 0; n < nsibling; n++) + engine_heartbeat_disable(siblings[n], &heartbeat[n]); + + rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out_heartbeat; + } + + engine = rq->engine; + GEM_BUG_ON(engine == ve->engine); + + /* Take ownership of the reset and tasklet */ + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out_heartbeat; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Fake a preemption event; failed of course */ + spin_lock_irq(&engine->active.lock); + __unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(rq->engine != ve->engine); + + /* Reset the engine while keeping our active request on hold */ + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + /* Release our grasp on the engine, letting CS flow again */ + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -EIO; + goto out_rq; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + +out_rq: + i915_request_put(rq); +out_heartbeat: + for (n = 0; n < nsibling; n++) + engine_heartbeat_enable(siblings[n], heartbeat[n]); + + intel_context_put(ve); +out_spin: + igt_spinner_fini(&spin); +out_free: + kfree(heartbeat); return err; } +static int live_virtual_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + + /* + * Check that we handle a reset event within a virtual engine. + * Only the physical engine is reset, but we have to check the flow + * of the virtual requests around the reset, and make sure it is not + * forgotten. + */ + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = reset_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_unlite_switch), + SUBTEST(live_unlite_preempt), + SUBTEST(live_hold_reset), + SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_gang), SUBTEST(live_preempt_hang), + SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), + SUBTEST(live_virtual_reset), }; if (!HAS_EXECLISTS(i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *lrc; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!lrc) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt, id) { + u32 *hw; + int dw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), + engine->kernel_context, + engine, + engine->kernel_context->ring, + true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if (lrc[dw] == 0) { + pr_debug("%s: skipped instruction %x at dword %d\n", + engine->name, lri, dw); + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + hexdump(lrc, PAGE_SIZE); + } + + i915_gem_object_unpin_map(engine->default_state); + if (err) + break; + } + + kfree(lrc); + return err; +} + +static int find_offset(const u32 *lri, u32 offset) +{ + int i; + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + if (lri[i] == offset) + return i; + + return -1; +} + +static int live_lrc_fixed(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check the assumed register offsets match the actual locations in + * the context image. + */ + + for_each_engine(engine, gt, id) { + const struct { + u32 reg; + u32 offset; + const char *name; + } tbl[] = { + { + i915_mmio_reg_offset(RING_START(engine->mmio_base)), + CTX_RING_START - 1, + "RING_START" + }, + { + i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), + CTX_RING_CTL - 1, + "RING_CTL" + }, + { + i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), + CTX_RING_HEAD - 1, + "RING_HEAD" + }, + { + i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), + CTX_RING_TAIL - 1, + "RING_TAIL" + }, + { + i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), + lrc_ring_mi_mode(engine), + "RING_MI_MODE" + }, + { + i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), + CTX_BB_STATE - 1, + "BB_STATE" + }, + { }, + }, *t; + u32 *hw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + for (t = tbl; t->name; t++) { + int dw = find_offset(hw, t->reg); + + if (dw != t->offset) { + pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", + engine->name, + t->name, + t->reg, + dw, + t->offset); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(engine->default_state); + } + + return err; +} + +static int __live_lrc_state(struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + for_each_engine(engine, gt, id) { + err = __live_lrc_state(engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static int gpr_make_dirty(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int __live_gpr_clear(struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine); + if (err) + return err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_put; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_gpr_clear(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + for_each_engine(engine, gt, id) { + err = __live_gpr_clear(engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_fixed), + SUBTEST(live_lrc_state), + SUBTEST(live_gpr_clear), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c new file mode 100644 index 000000000000..de1f83100fb6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -0,0 +1,419 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gt/intel_engine_pm.h" +#include "i915_selftest.h" + +#include "gem/selftests/mock_context.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" + +struct live_mocs { + struct drm_i915_mocs_table table; + struct i915_vma *scratch; + void *vaddr; +}; + +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) +{ + int err; + + if (!get_mocs_settings(gt->i915, &arg->table)) + return -EINVAL; + + arg->scratch = create_scratch(gt); + if (IS_ERR(arg->scratch)) + return PTR_ERR(arg->scratch); + + arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB); + if (IS_ERR(arg->vaddr)) { + err = PTR_ERR(arg->vaddr); + goto err_scratch; + } + + return 0; + +err_scratch: + i915_vma_unpin_and_release(&arg->scratch, 0); + return err; +} + +static void live_mocs_fini(struct live_mocs *arg) +{ + i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP); +} + +static int read_regs(struct i915_request *rq, + u32 addr, unsigned int count, + uint32_t *offset) +{ + unsigned int i; + u32 *cs; + + GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32))); + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0; i < count; i++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = *offset; + *cs++ = 0; + + addr += sizeof(u32); + *offset += sizeof(u32); + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int read_mocs_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr; + + if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) + addr = global_mocs_offset(); + else + addr = mocs_offset(rq->engine); + + return read_regs(rq, addr, table->n_entries, offset); +} + +static int read_l3cc_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + + return read_regs(rq, addr, (table->n_entries + 1) / 2, offset); +} + +static int check_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + unsigned int i; + u32 expect; + + for_each_mocs(expect, table, i) { + if (**vaddr != expect) { + pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + } + + return 0; +} + +static bool mcr_range(struct drm_i915_private *i915, u32 offset) +{ + /* + * Registers in this range are affected by the MCR selector + * which only controls CPU initiated MMIO. Routing does not + * work for CS access so we cannot verify them on this path. + */ + return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff; +} + +static int check_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */ + u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + unsigned int i; + u32 expect; + + for_each_l3cc(expect, table, i) { + if (!mcr_range(engine->i915, reg) && **vaddr != expect) { + pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + reg += 4; + } + + return 0; +} + +static int check_mocs_engine(struct live_mocs *arg, + struct intel_context *ce) +{ + struct i915_vma *vma = arg->scratch; + struct i915_request *rq; + u32 offset; + u32 *vaddr; + int err; + + memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (!err) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + + /* Read the mocs tables back using SRM */ + offset = i915_ggtt_offset(vma); + if (!err) + err = read_mocs_table(rq, &arg->table, &offset); + if (!err && ce->engine->class == RENDER_CLASS) + err = read_l3cc_table(rq, &arg->table, &offset); + offset -= i915_ggtt_offset(vma); + GEM_BUG_ON(offset > PAGE_SIZE); + + err = request_add_sync(rq, err); + if (err) + return err; + + /* Compare the results against the expected tables */ + vaddr = arg->vaddr; + if (!err) + err = check_mocs_table(ce->engine, &arg->table, &vaddr); + if (!err && ce->engine->class == RENDER_CLASS) + err = check_l3cc_table(ce->engine, &arg->table, &vaddr); + if (err) + return err; + + GEM_BUG_ON(arg->vaddr + offset != vaddr); + return 0; +} + +static int live_mocs_kernel(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Basic check the system is configured with the expected mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = check_mocs_engine(&mocs, engine->kernel_context); + intel_engine_pm_put(engine); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int live_mocs_clean(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Every new context should see the same mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + err = check_mocs_engine(&mocs, ce); + intel_context_put(ce); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int active_engine_reset(struct intel_context *ce, + const char *reason) +{ + struct igt_spinner spin; + struct i915_request *rq; + int err; + + err = igt_spinner_init(&spin, ce->engine->gt); + if (err) + return err; + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + if (IS_ERR(rq)) { + igt_spinner_fini(&spin); + return PTR_ERR(rq); + } + + err = request_add_spin(rq, &spin); + if (err == 0) + err = intel_engine_reset(ce->engine, reason); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + return err; +} + +static int __live_mocs_reset(struct live_mocs *mocs, + struct intel_context *ce) +{ + int err; + + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + err = active_engine_reset(ce, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + return 0; +} + +static int live_mocs_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err = 0; + + /* Check the mocs setup is retained over per-engine and global resets */ + + if (!intel_has_reset_engine(gt)) + return 0; + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + igt_global_reset_lock(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + intel_engine_pm_get(engine); + err = __live_mocs_reset(&mocs, ce); + intel_engine_pm_put(engine); + + intel_context_put(ce); + if (err) + break; + } + igt_global_reset_unlock(gt); + + live_mocs_fini(&mocs); + return err; +} + +int intel_mocs_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_mocs_kernel), + SUBTEST(live_mocs_clean), + SUBTEST(live_mocs_reset), + }; + struct drm_i915_mocs_table table; + + if (!get_mocs_settings(i915, &table)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c new file mode 100644 index 000000000000..8cc55a0e9e06 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gt_requests.h" +#include "intel_ring.h" +#include "selftest_rc6.h" + +#include "selftests/i915_random.h" + +int live_rc6_manual(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rc6 *rc6 = >->rc6; + intel_wakeref_t wakeref; + u64 res[2]; + int err = 0; + + /* + * Our claim is that we can "encourage" the GPU to enter rc6 at will. + * Let's try it! + */ + + if (!rc6->enabled) + return 0; + + /* bsw/byt use a PCU and decouple RC6 from our manual control */ + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + /* Force RC6 off for starters */ + __intel_rc6_disable(rc6); + msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(250); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if ((res[1] - res[0]) >> 10) { + pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", + (res[1] - res[0]) >> 10); + err = -EINVAL; + goto out_unlock; + } + + /* Manually enter RC6 */ + intel_rc6_park(rc6); + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(100); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + + if (res[1] == res[0]) { + pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n", + intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), + intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL)); + err = -EINVAL; + } + + /* Restore what should have been the original state! */ + intel_rc6_unpark(rc6); + +out_unlock: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return err; +} + +static const u32 *__live_rc6_ctx(struct intel_context *ce) +{ + struct i915_request *rq; + const u32 *result; + u32 cmd; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return ERR_CAST(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return cs; + } + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO); + *cs++ = ce->timeline->hwsp_offset + 8; + *cs++ = 0; + intel_ring_advance(rq, cs); + + result = rq->hwsp_seqno + 2; + i915_request_add(rq); + + return result; +} + +static struct intel_engine_cs ** +randomised_engines(struct intel_gt *gt, + struct rnd_state *prng, + unsigned int *count) +{ + struct intel_engine_cs *engine, **engines; + enum intel_engine_id id; + int n; + + n = 0; + for_each_engine(engine, gt, id) + n++; + if (!n) + return NULL; + + engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL); + if (!engines) + return NULL; + + n = 0; + for_each_engine(engine, gt, id) + engines[n++] = engine; + + i915_prandom_shuffle(engines, sizeof(*engines), n, prng); + + *count = n; + return engines; +} + +int live_rc6_ctx_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs **engines; + unsigned int n, count; + I915_RND_STATE(prng); + int err = 0; + + /* A read of CTX_INFO upsets rc6. Poke the bear! */ + if (INTEL_GEN(gt->i915) < 8) + return 0; + + engines = randomised_engines(gt, &prng, &count); + if (!engines) + return 0; + + for (n = 0; n < count; n++) { + struct intel_engine_cs *engine = engines[n]; + int pass; + + for (pass = 0; pass < 2; pass++) { + struct intel_context *ce; + unsigned int resets = + i915_reset_engine_count(>->i915->gpu_error, + engine); + const u32 *res; + + /* Use a sacrifical context */ + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + intel_engine_pm_get(engine); + res = __live_rc6_ctx(ce); + intel_engine_pm_put(engine); + intel_context_put(ce); + if (IS_ERR(res)) { + err = PTR_ERR(res); + goto out; + } + + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + intel_gt_pm_wait_for_idle(gt); + pr_debug("%s: CTX_INFO=%0x\n", + engine->name, READ_ONCE(*res)); + + if (resets != + i915_reset_engine_count(>->i915->gpu_error, + engine)) { + pr_err("%s: GPU reset required\n", + engine->name); + add_taint_for_CI(TAINT_WARN); + err = -EIO; + goto out; + } + } + } + +out: + kfree(engines); + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h new file mode 100644 index 000000000000..762fd442d7b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_RC6_H +#define SELFTEST_RC6_H + +int live_rc6_ctx_wa(void *arg); +int live_rc6_manual(void *arg); + +#endif /* SELFTEST_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index b5c590c9ccba..6ad6aca315f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -9,26 +9,29 @@ static int igt_global_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; unsigned int reset_count; + intel_wakeref_t wakeref; int err = 0; /* Check that we can issue a global GPU reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, ALL_ENGINES, NULL); + intel_gt_reset(gt, ALL_ENGINES, NULL); - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(>->i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - igt_global_reset_unlock(i915); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; return err; @@ -36,64 +39,123 @@ static int igt_global_reset(void *arg) static int igt_wedged_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; intel_wakeref_t wakeref; /* Check that we can recover a wedged device with a GPU reset */ - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); - GEM_BUG_ON(!i915_reset_failed(i915)); - i915_reset(i915, ALL_ENGINES, NULL); + GEM_BUG_ON(!intel_gt_is_wedged(gt)); + intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); - return i915_reset_failed(i915) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; } static int igt_atomic_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the resets are usable from atomic context */ - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { intel_engine_mask_t awake; - GEM_TRACE("intel_gpu_reset under %s\n", p->name); + GEM_TRACE("__intel_gt_reset under %s\n", p->name); - awake = reset_prepare(i915); + awake = reset_prepare(gt); p->critical_section_begin(); - reset_prepare(i915); - err = intel_gpu_reset(i915, ALL_ENGINES); + + err = __intel_gt_reset(gt, ALL_ENGINES); + p->critical_section_end(); - reset_finish(i915, awake); + reset_finish(gt, awake); if (err) { - pr_err("intel_gpu_reset failed under %s\n", p->name); + pr_err("__intel_gt_reset failed under %s\n", p->name); break; } } /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); + + return err; +} + +static int igt_atomic_engine_reset(void *arg) +{ + struct intel_gt *gt = arg; + const typeof(*igt_atomic_phases) *p; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that the resets are usable from atomic context */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); + + /* Flush any requests before we get started and check basics */ + if (!igt_force_reset(gt)) + goto out_unlock; + + for_each_engine(engine, gt, id) { + tasklet_disable(&engine->execlists.tasklet); + intel_engine_pm_get(engine); + + for (p = igt_atomic_phases; p->name; p++) { + GEM_TRACE("intel_engine_reset(%s) under %s\n", + engine->name, p->name); + + p->critical_section_begin(); + err = intel_engine_reset(engine, NULL); + p->critical_section_end(); + + if (err) { + pr_err("intel_engine_reset(%s) failed under %s\n", + engine->name, p->name); + break; + } + } + + intel_engine_pm_put(engine); + tasklet_enable(&engine->execlists.tasklet); + if (err) + break; + } + + /* As we poke around the guts, do a full reset before continuing. */ + igt_force_reset(gt); + +out_unlock: + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); return err; } @@ -104,18 +166,15 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_global_reset), /* attempt to recover GPU first */ SUBTEST(igt_wedged_reset), SUBTEST(igt_atomic_reset), + SUBTEST(igt_atomic_engine_reset), }; - intel_wakeref_t wakeref; - int err = 0; + struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, i915); - - return err; + return intel_gt_live_subtests(tests, gt); } diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 76d3977f1d4b..e2d78cc22fb4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,16 +6,19 @@ #include <linux/prime_numbers.h> -#include "gem/i915_gem_pm.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" +#include "intel_ring.h" -#include "i915_random.h" -#include "i915_selftest.h" +#include "../selftests/i915_random.h" +#include "../i915_selftest.h" -#include "igt_flush_test.h" -#include "mock_gem_device.h" -#include "mock_timeline.h" +#include "../selftests/igt_flush_test.h" +#include "../selftests/mock_gem_device.h" +#include "selftests/mock_timeline.h" -static struct page *hwsp_page(struct i915_timeline *tl) +static struct page *hwsp_page(struct intel_timeline *tl) { struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; @@ -23,7 +26,7 @@ static struct page *hwsp_page(struct i915_timeline *tl) return sg_page(obj->mm.pages->sgl); } -static unsigned long hwsp_cacheline(struct i915_timeline *tl) +static unsigned long hwsp_cacheline(struct intel_timeline *tl) { unsigned long address = (unsigned long)page_address(hwsp_page(tl)); @@ -33,9 +36,9 @@ static unsigned long hwsp_cacheline(struct i915_timeline *tl) #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) struct mock_hwsp_freelist { - struct drm_i915_private *i915; + struct intel_gt *gt; struct radix_tree_root cachelines; - struct i915_timeline **history; + struct intel_timeline **history; unsigned long count, max; struct rnd_state prng; }; @@ -46,12 +49,12 @@ enum { static void __mock_hwsp_record(struct mock_hwsp_freelist *state, unsigned int idx, - struct i915_timeline *tl) + struct intel_timeline *tl) { tl = xchg(&state->history[idx], tl); if (tl) { radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); - i915_timeline_put(tl); + intel_timeline_put(tl); } } @@ -59,14 +62,14 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned int count, unsigned int flags) { - struct i915_timeline *tl; + struct intel_timeline *tl; unsigned int idx; while (count--) { unsigned long cacheline; int err; - tl = i915_timeline_create(state->i915, NULL); + tl = intel_timeline_create(state->gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -77,7 +80,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", cacheline); } - i915_timeline_put(tl); + intel_timeline_put(tl); return err; } @@ -104,6 +107,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, static int mock_hwsp_freelist(void *arg) { struct mock_hwsp_freelist state; + struct drm_i915_private *i915; const struct { const char *name; unsigned int flags; @@ -115,12 +119,14 @@ static int mock_hwsp_freelist(void *arg) unsigned int na; int err = 0; + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; + state.gt = &i915->gt; /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -135,7 +141,6 @@ static int mock_hwsp_freelist(void *arg) goto err_put; } - mutex_lock(&state.i915->drm.struct_mutex); for (p = phases; p->name; p++) { pr_debug("%s(%s)\n", __func__, p->name); for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { @@ -148,10 +153,9 @@ static int mock_hwsp_freelist(void *arg) out: for (na = 0; na < state.max; na++) __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); kfree(state.history); err_put: - drm_dev_put(&state.i915->drm); + drm_dev_put(&i915->drm); return err; } @@ -162,21 +166,21 @@ struct __igt_sync { bool set; }; -static int __igt_sync(struct i915_timeline *tl, +static int __igt_sync(struct intel_timeline *tl, u64 ctx, const struct __igt_sync *p, const char *name) { int ret; - if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", name, p->name, ctx, p->seqno, yesno(p->expected)); return -EINVAL; } if (p->set) { - ret = __i915_timeline_sync_set(tl, ctx, p->seqno); + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); if (ret) return ret; } @@ -204,7 +208,7 @@ static int igt_sync(void *arg) { "unwrap", UINT_MAX, true, false }, {}, }, *p; - struct i915_timeline tl; + struct intel_timeline tl; int order, offset; int ret = -ENODEV; @@ -248,7 +252,7 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { struct rnd_state prng; - struct i915_timeline tl; + struct intel_timeline tl; unsigned long end_time, count; u64 prng32_1M; ktime_t kt; @@ -286,7 +290,7 @@ static int bench_sync(void *arg) do { u64 id = i915_prandom_u64_state(&prng); - __i915_timeline_sync_set(&tl, id, 0); + __intel_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); @@ -301,7 +305,7 @@ static int bench_sync(void *arg) while (end_time--) { u64 id = i915_prandom_u64_state(&prng); - if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + if (!__intel_timeline_sync_is_later(&tl, id, 0)) { mock_timeline_fini(&tl); pr_err("Lookup of %llu failed\n", id); return -EINVAL; @@ -322,7 +326,7 @@ static int bench_sync(void *arg) kt = ktime_get(); end_time = jiffies + HZ/10; do { - __i915_timeline_sync_set(&tl, count++, 0); + __intel_timeline_sync_set(&tl, count++, 0); } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); pr_info("%s: %lu in-order insertions, %lluns/insert\n", @@ -332,7 +336,7 @@ static int bench_sync(void *arg) end_time = count; kt = ktime_get(); while (end_time--) { - if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { + if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { pr_err("Lookup of %lu failed\n", end_time); mock_timeline_fini(&tl); return -EINVAL; @@ -356,8 +360,8 @@ static int bench_sync(void *arg) u32 id = random_engine(&prng); u32 seqno = prandom_u32_state(&prng); - if (!__i915_timeline_sync_is_later(&tl, id, seqno)) - __i915_timeline_sync_set(&tl, id, seqno); + if (!__intel_timeline_sync_is_later(&tl, id, seqno)) + __intel_timeline_sync_set(&tl, id, seqno); count++; } while (!time_after(jiffies, end_time)); @@ -385,8 +389,8 @@ static int bench_sync(void *arg) */ u64 id = (u64)(count & mask) << order; - __i915_timeline_sync_is_later(&tl, id, 0); - __i915_timeline_sync_set(&tl, id, 0); + __intel_timeline_sync_is_later(&tl, id, 0); + __intel_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); @@ -401,7 +405,7 @@ static int bench_sync(void *arg) return 0; } -int i915_timeline_mock_selftests(void) +int intel_timeline_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(mock_hwsp_freelist), @@ -443,49 +447,51 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) } static struct i915_request * -tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) +tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) { struct i915_request *rq; int err; - lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */ - - err = i915_timeline_pin(tl); + err = intel_timeline_pin(tl); if (err) { rq = ERR_PTR(err); goto out; } - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) goto out_unpin; + i915_request_get(rq); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); - if (err) + if (err) { + i915_request_put(rq); rq = ERR_PTR(err); + } out_unpin: - i915_timeline_unpin(tl); + intel_timeline_unpin(tl); out: if (IS_ERR(rq)) pr_err("Failed to write to timeline!\n"); return rq; } -static struct i915_timeline * -checked_i915_timeline_create(struct drm_i915_private *i915) +static struct intel_timeline * +checked_intel_timeline_create(struct intel_gt *gt) { - struct i915_timeline *tl; + struct intel_timeline *tl; - tl = i915_timeline_create(i915, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return tl; if (*tl->hwsp_seqno != tl->seqno) { pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", *tl->hwsp_seqno, tl->seqno); - i915_timeline_put(tl); + intel_timeline_put(tl); return ERR_PTR(-EINVAL); } @@ -495,11 +501,10 @@ checked_i915_timeline_create(struct drm_i915_private *i915) static int live_hwsp_engine(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; + struct intel_gt *gt = arg; + struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -514,55 +519,54 @@ static int live_hwsp_engine(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + for (n = 0; n < NUM_TIMELINES; n++) { - struct i915_timeline *tl; + struct intel_timeline *tl; struct i915_request *rq; - tl = checked_i915_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { - i915_timeline_put(tl); + intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } timelines[count++] = tl; + i915_request_put(rq); } + + intel_engine_pm_put(engine); + if (err) + break; } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; + struct intel_timeline *tl = timelines[n]; if (!err && *tl->hwsp_seqno != n) { pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", n, *tl->hwsp_seqno); err = -EINVAL; } - i915_timeline_put(tl); + intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -570,11 +574,10 @@ out: static int live_hwsp_alternate(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; + struct intel_gt *gt = arg; + struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -590,66 +593,62 @@ static int live_hwsp_alternate(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { - struct i915_timeline *tl; + for_each_engine(engine, gt, id) { + struct intel_timeline *tl; struct i915_request *rq; if (!intel_engine_can_store_dword(engine)) continue; - tl = checked_i915_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { + intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } + intel_engine_pm_get(engine); rq = tl_write(tl, engine, count); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { - i915_timeline_put(tl); + intel_timeline_put(tl); err = PTR_ERR(rq); goto out; } timelines[count++] = tl; + i915_request_put(rq); } } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; + struct intel_timeline *tl = timelines[n]; if (!err && *tl->hwsp_seqno != n) { pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", n, *tl->hwsp_seqno); err = -EINVAL; } - i915_timeline_put(tl); + intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } static int live_hwsp_wrap(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_timeline *tl; + struct intel_timeline *tl; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = 0; /* @@ -657,22 +656,18 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + tl = intel_timeline_create(gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); - tl = i915_timeline_create(i915, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; - err = i915_timeline_pin(tl); + err = intel_timeline_pin(tl); if (err) goto out_free; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -680,7 +675,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; @@ -688,7 +683,9 @@ static int live_hwsp_wrap(void *arg) tl->seqno = -4u; - err = i915_timeline_get_seqno(tl, rq, &seqno[0]); + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = intel_timeline_get_seqno(tl, rq, &seqno[0]); + mutex_unlock(&tl->mutex); if (err) { i915_request_add(rq); goto out; @@ -703,7 +700,9 @@ static int live_hwsp_wrap(void *arg) } hwsp_seqno[0] = tl->hwsp_seqno; - err = i915_timeline_get_seqno(tl, rq, &seqno[1]); + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = intel_timeline_get_seqno(tl, rq, &seqno[1]); + mutex_unlock(&tl->mutex); if (err) { i915_request_add(rq); goto out; @@ -738,29 +737,24 @@ static int live_hwsp_wrap(void *arg) goto out; } - i915_retire_requests(i915); /* recycle HWSP */ + intel_gt_retire_requests(gt); /* recycle HWSP */ } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; - i915_timeline_unpin(tl); + intel_timeline_unpin(tl); out_free: - i915_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - + intel_timeline_put(tl); return err; } static int live_hwsp_recycle(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count; int err = 0; @@ -770,38 +764,38 @@ static int live_hwsp_recycle(void *arg) * want to confuse ourselves or the GPU. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { IGT_TIMEOUT(end_time); if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + do { - struct i915_timeline *tl; + struct intel_timeline *tl; struct i915_request *rq; - tl = checked_i915_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { - i915_timeline_put(tl); + intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Wait for timeline writes timed out!\n"); - i915_timeline_put(tl); + i915_request_put(rq); + intel_timeline_put(tl); err = -EIO; - goto out; + break; } if (*tl->hwsp_seqno != count) { @@ -810,26 +804,23 @@ static int live_hwsp_recycle(void *arg) err = -EINVAL; } - i915_timeline_put(tl); + i915_request_put(rq); + intel_timeline_put(tl); count++; if (err) - goto out; - - i915_timelines_park(i915); /* Encourage recycling! */ + break; } while (!__igt_timeout(end_time, NULL)); - } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + intel_engine_pm_put(engine); + if (err) + break; + } return err; } -int i915_timeline_live_selftests(struct drm_i915_private *i915) +int intel_timeline_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_hwsp_recycle), @@ -838,8 +829,8 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_wrap), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 44becd9538be..ac1921854cbf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -5,13 +5,14 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" #include "intel_reset.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" -#include "selftests/igt_wedge_me.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -24,53 +25,70 @@ static const struct wo_register { { INTEL_GEMINILAKE, 0x731c } }; -#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 8) struct wa_lists { struct i915_wa_list gt_wa_list; struct { - char name[REF_NAME_MAX]; struct i915_wa_list wa_list; struct i915_wa_list ctx_wa_list; } engine[I915_NUM_ENGINES]; }; +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + i915_request_put(rq); + + return err; +} + static void -reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; memset(lists, 0, sizeof(*lists)); - wa_init_start(&lists->gt_wa_list, "GT_REF"); - gt_init_workarounds(i915, &lists->gt_wa_list); + wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); + gt_init_workarounds(gt->i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; - char *name = lists->engine[id].name; - - snprintf(name, REF_NAME_MAX, "%s_REF", engine->name); - wa_init_start(wal, name); + wa_init_start(wal, "REF", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); - snprintf(name, REF_NAME_MAX, "%s_CTX_REF", engine->name); - __intel_engine_init_ctx_wa(engine, &lists->engine[id].ctx_wa_list, - name); + "CTX_REF"); } } static void -reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) intel_wa_list_free(&lists->engine[id].wa_list); intel_wa_list_free(&lists->gt_wa_list); @@ -102,7 +120,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) i915_gem_object_flush_map(result); i915_gem_object_unpin_map(result); - vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(result, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -119,7 +137,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) } i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto err_req; @@ -184,7 +204,7 @@ static int check_whitelist(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *results; - struct igt_wedge_me wedge; + struct intel_wedge_me wedge; u32 *vaddr; int err; int i; @@ -195,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ + intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (i915_terminally_wedged(ctx->i915)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; if (err) goto out_put; @@ -231,35 +251,29 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - i915_reset(engine->i915, engine->mask, "live_workarounds"); + intel_gt_reset(engine->gt, engine->mask, "live_workarounds"); return 0; } static int do_engine_reset(struct intel_engine_cs *engine) { - return i915_reset_engine(engine, "live_workarounds"); + return intel_engine_reset(engine, "live_workarounds"); } static int switch_to_scratch_context(struct intel_engine_cs *engine, struct igt_spinner *spin) { - struct i915_gem_context *ctx; + struct intel_context *ce; struct i915_request *rq; - intel_wakeref_t wakeref; int err = 0; - ctx = kernel_context(engine->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); - - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) - rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); - kernel_context_close(ctx); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); + intel_context_put(ce); if (IS_ERR(rq)) { spin = NULL; @@ -267,13 +281,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, goto err; } - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - + err = request_add_spin(rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -286,79 +294,82 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, const char *name) { struct drm_i915_private *i915 = engine->i915; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *tmp; struct igt_spinner spin; intel_wakeref_t wakeref; int err; - pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", - engine->whitelist.count, name); - - err = igt_spinner_init(&spin, i915); - if (err) - return err; + pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n", + engine->whitelist.count, engine->name, name); ctx = kernel_context(i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); + err = igt_spinner_init(&spin, engine->gt); + if (err) + goto out_ctx; + err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *before* %s reset!\n", name); - goto out; + goto out_spin; } err = switch_to_scratch_context(engine, &spin); if (err) - goto out; + goto out_spin; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); igt_spinner_end(&spin); - igt_spinner_fini(&spin); if (err) { pr_err("%s reset failed\n", name); - goto out; + goto out_spin; } err = check_whitelist(ctx, engine); if (err) { pr_err("Whitelist not preserved in context across %s reset!\n", name); - goto out; + goto out_spin; } + tmp = kernel_context(i915); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto out_spin; + } kernel_context_close(ctx); - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + ctx = tmp; err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *after* %s reset in fresh context!\n", name); - goto out; + goto out_spin; } -out: +out_spin: + igt_spinner_fini(&spin); +out_ctx: kernel_context_close(ctx); return err; } -static struct i915_vma *create_batch(struct i915_gem_context *ctx) +static struct i915_vma *create_batch(struct i915_address_space *vm) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE); if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -393,6 +404,10 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) enum intel_platform platform = INTEL_INFO(engine->i915)->platform; int i; + if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_WR) + return true; + for (i = 0; i < ARRAY_SIZE(wo_registers); i++) { if (wo_registers[i].platform == platform && wo_registers[i].reg == reg) @@ -404,7 +419,8 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) static bool ro_register(u32 reg) { - if (reg & RING_FORCE_TO_NONPRIV_RD) + if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_RD) return true; return false; @@ -425,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine) return count; } -static int check_dirty_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int check_dirty_whitelist(struct intel_context *ce) { const u32 values[] = { 0x00000000, @@ -454,16 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; + struct intel_engine_cs *engine = ce->engine; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); + scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); - batch = create_batch(ctx); + batch = create_batch(ce->vm); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_scratch; @@ -476,16 +492,19 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, u32 srm, lrm, rsvd; u32 expect; int idx; + bool ro_reg; if (wo_register(engine, reg)) continue; - if (ro_register(reg)) - continue; + ro_reg = ro_register(reg); + + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) + if (INTEL_GEN(engine->i915) >= 8) lrm++, srm++; pr_debug("%s: Writing garbage to %x\n", @@ -542,9 +561,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); + intel_gt_chipset_flush(engine->gt); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -556,6 +575,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + err = engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, 0); @@ -563,15 +590,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = request_add_sync(rq, err); + if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - i915_gem_set_wedged(ctx->i915); - err = -EIO; + intel_gt_set_wedged(engine->gt); goto out_batch; } @@ -582,24 +605,35 @@ err_request: } GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff); - rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */ - if (!rsvd) { - pr_err("%s: Unable to write to whitelisted register %x\n", - engine->name, reg); - err = -EINVAL; - goto out_unpin; + if (!ro_reg) { + /* detect write masking */ + rsvd = results[ARRAY_SIZE(values)]; + if (!rsvd) { + pr_err("%s: Unable to write to whitelisted register %x\n", + engine->name, reg); + err = -EINVAL; + goto out_unpin; + } } expect = results[0]; idx = 1; for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, values[v], rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, values[v], rsvd); + if (results[idx] != expect) err++; idx++; } for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, ~values[v], rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, ~values[v], rsvd); + if (results[idx] != expect) err++; idx++; @@ -608,15 +642,22 @@ err_request: pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n", engine->name, err, reg); - pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", - engine->name, reg, results[0], rsvd); + if (ro_reg) + pr_info("%s: Whitelisted read-only register: %x, original value %08x\n", + engine->name, reg, results[0]); + else + pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", + engine->name, reg, results[0], rsvd); expect = results[0]; idx = 1; for (v = 0; v < ARRAY_SIZE(values); v++) { u32 w = values[v]; - expect = reg_write(expect, w, rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, w, rsvd); pr_info("Wrote %08x, read %08x, expect %08x\n", w, results[idx], expect); idx++; @@ -624,7 +665,10 @@ err_request: for (v = 0; v < ARRAY_SIZE(values); v++) { u32 w = ~values[v]; - expect = reg_write(expect, w, rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, w, rsvd); pr_info("Wrote %08x, read %08x, expect %08x\n", w, results[idx], expect); idx++; @@ -638,7 +682,7 @@ out_unpin: break; } - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(engine->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -649,84 +693,68 @@ out_scratch: static int live_dirty_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; - int err = 0; /* Can the user write to the whitelisted registers? */ - if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - mutex_unlock(&i915->drm.struct_mutex); - file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } + for_each_engine(engine, gt, id) { + struct intel_context *ce; + int err; - for_each_engine(engine, i915, id) { if (engine->whitelist.count == 0) continue; - err = check_dirty_whitelist(ctx, engine); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = check_dirty_whitelist(ce); + intel_context_put(ce); if (err) - goto out_file; + return err; } -out_file: - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - return err; + return 0; } static int live_reset_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ + igt_global_reset_lock(gt); - if (!engine || engine->whitelist.count == 0) - return 0; - - igt_global_reset_lock(i915); + for_each_engine(engine, gt, id) { + if (engine->whitelist.count == 0) + continue; - if (intel_has_reset_engine(i915)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); - if (err) - goto out; - } + if (intel_has_reset_engine(gt)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + if (err) + goto out; + } - if (intel_has_gpu_reset(i915)) { - err = check_whitelist_across_reset(engine, - do_device_reset, - "device"); - if (err) - goto out; + if (intel_has_gpu_reset(gt)) { + err = check_whitelist_across_reset(engine, + do_device_reset, + "device"); + if (err) + goto out; + } } out: - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); return err; } @@ -742,6 +770,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, if (IS_ERR(rq)) return PTR_ERR(rq); + i915_vma_lock(results); + err = i915_request_await_object(rq, results->obj, true); + if (err == 0) + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(results); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) srm++; @@ -756,8 +792,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear RD only and WR only flags */ - reg &= ~(RING_FORCE_TO_NONPRIV_RD | RING_FORCE_TO_NONPRIV_WR); + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; *cs++ = srm; *cs++ = reg; @@ -767,23 +803,21 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, intel_ring_advance(rq, cs); err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; - - return err; + return request_add_sync(rq, err); } static int scrub_whitelisted_registers(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *batch; int i, err = 0; u32 *cs; - batch = create_batch(ctx); + vm = i915_gem_context_get_vm_rcu(ctx); + batch = create_batch(vm); + i915_vm_put(vm); if (IS_ERR(batch)) return PTR_ERR(batch); @@ -800,13 +834,16 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, if (ro_register(reg)) continue; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + *cs++ = reg; *cs++ = 0xffffffff; } *cs++ = MI_BATCH_BUFFER_END; i915_gem_object_flush_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); + intel_gt_chipset_flush(engine->gt); rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { @@ -820,13 +857,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + /* Perform the writes from an unprivileged "user" batch */ err = engine->emit_bb_start(rq, batch->node.start, 0, 0); err_request: - i915_request_add(rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; + err = request_add_sync(rq, err); err_unpin: i915_gem_object_unpin_map(batch->obj); @@ -927,7 +970,8 @@ check_whitelisted_registers(struct intel_engine_cs *engine, for (i = 0; i < engine->whitelist.count; i++) { const struct i915_wa *wa = &engine->whitelist.list[i]; - if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD) + if (i915_mmio_reg_offset(wa->reg) & + RING_FORCE_TO_NONPRIV_ACCESS_RD) continue; if (!fn(engine, a[i], b[i], wa->reg)) @@ -942,7 +986,7 @@ err_a: static int live_isolated_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct { struct i915_gem_context *ctx; struct i915_vma *scratch[2]; @@ -956,40 +1000,46 @@ static int live_isolated_whitelist(void *arg) * invisible to a second context. */ - if (!intel_engines_has_context_isolation(i915)) - return 0; - - if (!i915->kernel_context->vm) + if (!intel_engines_has_context_isolation(gt->i915)) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_address_space *vm; struct i915_gem_context *c; - c = kernel_context(i915); + c = kernel_context(gt->i915); if (IS_ERR(c)) { err = PTR_ERR(c); goto err; } - client[i].scratch[0] = create_scratch(c->vm, 1024); + vm = i915_gem_context_get_vm_rcu(c); + + client[i].scratch[0] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); + i915_vm_put(vm); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(c->vm, 1024); + client[i].scratch[1] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); + i915_vm_put(vm); kernel_context_close(c); goto err; } client[i].ctx = c; + i915_vm_put(vm); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { + if (!engine->kernel_context->vm) + continue; + if (!whitelist_writable_count(engine)) continue; @@ -1043,7 +1093,7 @@ err: kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; @@ -1060,7 +1110,7 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + for_each_gem_engine(ce, i915_gem_context_engines(ctx), it) { enum intel_engine_id id = ce->engine->id; ok &= engine_wa_list_verify(ce, @@ -1071,7 +1121,6 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, &lists->engine[id].ctx_wa_list, str) == 0; } - i915_gem_context_unlock_engines(ctx); return ok; } @@ -1079,39 +1128,42 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, static int live_gpu_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); + i915_gem_context_lock_engines(ctx); + pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) goto out; - i915_reset(i915, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); out: + i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); return ok ? 0 : -ESRCH; } @@ -1119,29 +1171,30 @@ out: static int live_engine_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; + struct intel_gt *gt = arg; + struct i915_gem_engines_iter it; struct i915_gem_context *ctx; + struct intel_context *ce; struct igt_spinner spin; - enum intel_engine_id id; struct i915_request *rq; intel_wakeref_t wakeref; struct wa_lists lists; int ret = 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); - for_each_engine(engine, i915, id) { + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + struct intel_engine_cs *engine = ce->engine; bool ok; pr_info("Verifying after %s reset...\n", engine->name); @@ -1152,7 +1205,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after idle reset"); if (!ok) { @@ -1160,27 +1213,25 @@ live_engine_reset_workarounds(void *arg) goto err; } - ret = igt_spinner_init(&spin, i915); + ret = igt_spinner_init(&spin, engine->gt); if (ret) goto err; - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { ret = PTR_ERR(rq); igt_spinner_fini(&spin); goto err; } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { + ret = request_add_spin(rq, &spin); + if (ret) { pr_err("Spinner failed to start\n"); igt_spinner_fini(&spin); - ret = -ETIMEDOUT; goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -1191,14 +1242,14 @@ live_engine_reset_workarounds(void *arg) goto err; } } - err: - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + i915_gem_context_unlock_engines(ctx); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); kernel_context_close(ctx); - igt_flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(gt->i915); return ret; } @@ -1212,14 +1263,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gpu_reset_workarounds), SUBTEST(live_engine_reset_workarounds), }; - int err; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 65b52be23d42..aeb1d1f616e8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -4,18 +4,18 @@ * Copyright © 2017-2018 Intel Corporation */ -#include "../i915_timeline.h" +#include "../intel_timeline.h" #include "mock_timeline.h" -void mock_timeline_init(struct i915_timeline *timeline, u64 context) +void mock_timeline_init(struct intel_timeline *timeline, u64 context) { - timeline->i915 = NULL; + timeline->gt = NULL; timeline->fence_context = context; mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -23,7 +23,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_fini(struct i915_timeline *timeline) +void mock_timeline_fini(struct intel_timeline *timeline) { i915_syncmap_free(&timeline->sync); } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h new file mode 100644 index 000000000000..d2bcc3df6183 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +#include <linux/types.h> + +struct intel_timeline; + +void mock_timeline_init(struct intel_timeline *timeline, u64 context); +void mock_timeline_fini(struct intel_timeline *timeline); + +#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index c40a6efdd33a..5d00a3b2d914 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -1,44 +1,48 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ +#include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" -static void gen8_guc_raise_irq(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} +/** + * DOC: GuC + * + * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is + * designed to offload some of the functionality usually performed by the host + * driver; currently the main operations it can take care of are: + * + * - Authentication of the HuC, which is required to fully enable HuC usage. + * - Low latency graphics context scheduling (a.k.a. GuC submission). + * - GT Power management. + * + * The enable_guc module parameter can be used to select which of those + * operations to enable within GuC. Note that not all the operations are + * supported on all gen9+ platforms. + * + * Enabling the GuC is not mandatory and therefore the firmware is only loaded + * if at least one of the operations is selected. However, not loading the GuC + * might result in the loss of some features that do require the GuC (currently + * just the HuC, but more are expected to land in the future). + */ -static void gen11_guc_raise_irq(struct intel_guc *guc) +void intel_guc_notify(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_gt *gt = guc_to_gt(guc); - I915_WRITE(GEN11_GUC_HOST_INTERRUPT, 0); + /* + * On Gen11+, the value written to the register is passes as a payload + * to the FW. However, the FW currently treats all values the same way + * (H2G interrupt), so we can just write the value that the HW expects + * on older gens. + */ + intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER); } static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) @@ -52,11 +56,11 @@ static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) void intel_guc_init_send_regs(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_gt *gt = guc_to_gt(guc); enum forcewake_domains fw_domains = 0; unsigned int i; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(gt->i915) >= 11) { guc->send_regs.base = i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT; @@ -67,206 +71,122 @@ void intel_guc_init_send_regs(struct intel_guc *guc) } for (i = 0; i < guc->send_regs.count; i++) { - fw_domains |= intel_uncore_forcewake_for_reg(&dev_priv->uncore, + fw_domains |= intel_uncore_forcewake_for_reg(gt->uncore, guc_send_reg(guc, i), FW_REG_READ | FW_REG_WRITE); } guc->send_regs.fw_domains = fw_domains; } -void intel_guc_init_early(struct intel_guc *guc) +static void gen9_reset_guc_interrupts(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_i915(guc); + struct intel_gt *gt = guc_to_gt(guc); - intel_guc_fw_init_early(guc); - intel_guc_ct_init_early(&guc->ct); - intel_guc_log_init_early(&guc->log); + assert_rpm_wakelock_held(>->i915->runtime_pm); - mutex_init(&guc->send_mutex); - spin_lock_init(&guc->irq_lock); - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; - if (INTEL_GEN(i915) >= 11) { - guc->notify = gen11_guc_raise_irq; - guc->interrupts.reset = gen11_reset_guc_interrupts; - guc->interrupts.enable = gen11_enable_guc_interrupts; - guc->interrupts.disable = gen11_disable_guc_interrupts; - } else { - guc->notify = gen8_guc_raise_irq; - guc->interrupts.reset = gen9_reset_guc_interrupts; - guc->interrupts.enable = gen9_enable_guc_interrupts; - guc->interrupts.disable = gen9_disable_guc_interrupts; - } + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); + spin_unlock_irq(>->irq_lock); } -static int guc_init_wq(struct intel_guc *guc) +static void gen9_enable_guc_interrupts(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_gt *gt = guc_to_gt(guc); - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.relay.flush_wq = - alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.relay.flush_wq) { - DRM_ERROR("Couldn't allocate workqueue for GuC log\n"); - return -ENOMEM; - } + assert_rpm_wakelock_held(>->i915->runtime_pm); - /* - * Even though both sending GuC action, and adding a new workitem to - * GuC workqueue are serialized (each with its own locking), since - * we're using mutliple engines, it's possible that we're going to - * issue a preempt request with two (or more - each for different - * engine) workitems in GuC queue. In this situation, GuC may submit - * all of them, which will make us very confused. - * Our preemption contexts may even already be complete - before we - * even had the chance to sent the preempt action to GuC!. Rather - * than introducing yet another lock, we can just use ordered workqueue - * to make sure we're always sending a single preemption request with a - * single workitem. - */ - if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && - USES_GUC_SUBMISSION(dev_priv)) { - guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", - WQ_HIGHPRI); - if (!guc->preempt_wq) { - destroy_workqueue(guc->log.relay.flush_wq); - DRM_ERROR("Couldn't allocate workqueue for GuC " - "preemption\n"); - return -ENOMEM; - } + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & + gt->pm_guc_events); + guc->interrupts.enabled = true; + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); } - - return 0; + spin_unlock_irq(>->irq_lock); } -static void guc_fini_wq(struct intel_guc *guc) +static void gen9_disable_guc_interrupts(struct intel_guc *guc) { - struct workqueue_struct *wq; - - wq = fetch_and_zero(&guc->preempt_wq); - if (wq) - destroy_workqueue(wq); - - wq = fetch_and_zero(&guc->log.relay.flush_wq); - if (wq) - destroy_workqueue(wq); -} + struct intel_gt *gt = guc_to_gt(guc); -int intel_guc_init_misc(struct intel_guc *guc) -{ - struct drm_i915_private *i915 = guc_to_i915(guc); - int ret; + assert_rpm_wakelock_held(>->i915->runtime_pm); - ret = guc_init_wq(guc); - if (ret) - return ret; + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; - intel_uc_fw_fetch(i915, &guc->fw); + gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); - return 0; -} + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); -void intel_guc_fini_misc(struct intel_guc *guc) -{ - intel_uc_fw_cleanup_fetch(&guc->fw); - guc_fini_wq(guc); + gen9_reset_guc_interrupts(guc); } -static int guc_shared_data_create(struct intel_guc *guc) +static void gen11_reset_guc_interrupts(struct intel_guc *guc) { - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } + struct intel_gt *gt = guc_to_gt(guc); - guc->shared_data = vma; - guc->shared_data_vaddr = vaddr; - - return 0; + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + spin_unlock_irq(>->irq_lock); } -static void guc_shared_data_destroy(struct intel_guc *guc) +static void gen11_enable_guc_interrupts(struct intel_guc *guc) { - i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_MASK, ~events); + guc->interrupts.enabled = true; + } + spin_unlock_irq(>->irq_lock); } -int intel_guc_init(struct intel_guc *guc) +static void gen11_disable_guc_interrupts(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - ret = intel_uc_fw_init(&guc->fw); - if (ret) - goto err_fetch; - - ret = guc_shared_data_create(guc); - if (ret) - goto err_fw; - GEM_BUG_ON(!guc->shared_data); - - ret = intel_guc_log_create(&guc->log); - if (ret) - goto err_shared; - - ret = intel_guc_ads_create(guc); - if (ret) - goto err_log; - GEM_BUG_ON(!guc->ads_vma); + struct intel_gt *gt = guc_to_gt(guc); - ret = intel_guc_ct_init(&guc->ct); - if (ret) - goto err_ads; + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; - /* We need to notify the guc whenever we change the GGTT */ - i915_ggtt_enable_guc(dev_priv); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - return 0; + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); -err_ads: - intel_guc_ads_destroy(guc); -err_log: - intel_guc_log_destroy(&guc->log); -err_shared: - guc_shared_data_destroy(guc); -err_fw: - intel_uc_fw_fini(&guc->fw); -err_fetch: - intel_uc_fw_cleanup_fetch(&guc->fw); - return ret; + gen11_reset_guc_interrupts(guc); } -void intel_guc_fini(struct intel_guc *guc) +void intel_guc_init_early(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - i915_ggtt_disable_guc(dev_priv); - - intel_guc_ct_fini(&guc->ct); + intel_guc_fw_init_early(guc); + intel_guc_ct_init_early(&guc->ct); + intel_guc_log_init_early(&guc->log); + intel_guc_submission_init_early(guc); - intel_guc_ads_destroy(guc); - intel_guc_log_destroy(&guc->log); - guc_shared_data_destroy(guc); - intel_uc_fw_fini(&guc->fw); - intel_uc_fw_cleanup_fetch(&guc->fw); + mutex_init(&guc->send_mutex); + spin_lock_init(&guc->irq_lock); + if (INTEL_GEN(i915) >= 11) { + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; + guc->interrupts.reset = gen11_reset_guc_interrupts; + guc->interrupts.enable = gen11_enable_guc_interrupts; + guc->interrupts.disable = gen11_disable_guc_interrupts; + } else { + guc->notify_reg = GUC_SEND_INTERRUPT; + guc->interrupts.reset = gen9_reset_guc_interrupts; + guc->interrupts.enable = gen9_enable_guc_interrupts; + guc->interrupts.disable = gen9_disable_guc_interrupts; + } } static u32 guc_ctl_debug_flags(struct intel_guc *guc) @@ -287,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) { u32 flags = 0; - if (!USES_GUC_SUBMISSION(guc_to_i915(guc))) + if (!intel_guc_is_submission_supported(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; return flags; @@ -297,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) { u32 flags = 0; - if (USES_GUC_SUBMISSION(guc_to_i915(guc))) { + if (intel_guc_is_submission_supported(guc)) { u32 ctxnum, base; base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); @@ -364,13 +284,12 @@ static u32 guc_ctl_ads_flags(struct intel_guc *guc) * transfer. These parameters are read by the firmware on startup * and cannot be changed thereafter. */ -void intel_guc_init_params(struct intel_guc *guc) +static void guc_init_params(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 params[GUC_CTL_MAX_DWORDS]; + u32 *params = guc->params; int i; - memset(params, 0, sizeof(params)); + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); @@ -380,32 +299,107 @@ void intel_guc_init_params(struct intel_guc *guc) for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]); +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void intel_guc_write_params(struct intel_guc *guc) +{ + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; + int i; /* * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and * they are power context saved so it's ok to release forcewake * when we are done here and take it again at xfer time. */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_BLITTER); + intel_uncore_forcewake_get(uncore, FORCEWAKE_BLITTER); - I915_WRITE(SOFT_SCRATCH(0), 0); + intel_uncore_write(uncore, SOFT_SCRATCH(0), 0); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + intel_uncore_write(uncore, SOFT_SCRATCH(1 + i), guc->params[i]); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_BLITTER); + intel_uncore_forcewake_put(uncore, FORCEWAKE_BLITTER); } -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) +int intel_guc_init(struct intel_guc *guc) { - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; + struct intel_gt *gt = guc_to_gt(guc); + int ret; + + ret = intel_uc_fw_init(&guc->fw); + if (ret) + goto err_fetch; + + ret = intel_guc_log_create(&guc->log); + if (ret) + goto err_fw; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_log; + GEM_BUG_ON(!guc->ads_vma); + + ret = intel_guc_ct_init(&guc->ct); + if (ret) + goto err_ads; + + if (intel_guc_is_submission_supported(guc)) { + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = intel_guc_submission_init(guc); + if (ret) + goto err_ct; + } + + /* now that everything is perma-pinned, initialize the parameters */ + guc_init_params(guc); + + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(gt->ggtt); + + return 0; + +err_ct: + intel_guc_ct_fini(&guc->ct); +err_ads: + intel_guc_ads_destroy(guc); +err_log: + intel_guc_log_destroy(&guc->log); +err_fw: + intel_uc_fw_fini(&guc->fw); +err_fetch: + intel_uc_fw_cleanup_fetch(&guc->fw); + DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret); + return ret; } -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) +void intel_guc_fini(struct intel_guc *guc) { - WARN(1, "Unexpected event: no suitable handler\n"); + struct intel_gt *gt = guc_to_gt(guc); + + if (!intel_uc_fw_is_available(&guc->fw)) + return; + + i915_ggtt_disable_guc(gt->ggtt); + + if (intel_guc_is_submission_supported(guc)) + intel_guc_submission_fini(guc); + + intel_guc_ct_fini(&guc->ct); + + intel_guc_ads_destroy(guc); + intel_guc_log_destroy(&guc->log); + intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); + + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED); } /* @@ -414,8 +408,7 @@ void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; u32 status; int i; int ret; @@ -464,7 +457,8 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, int count = min(response_buf_size, guc->send_regs.count - 1); for (i = 0; i < count; i++) - response_buf[i] = I915_READ(guc_send_reg(guc, i + 1)); + response_buf[i] = intel_uncore_read(uncore, + guc_send_reg(guc, i + 1)); } /* Use data from the GuC response as our return value */ @@ -497,7 +491,7 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, int intel_guc_sample_forcewake(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; u32 action[2]; action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; @@ -538,7 +532,7 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) */ int intel_guc_suspend(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; int ret; u32 status; u32 action[] = { @@ -547,6 +541,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -556,13 +557,14 @@ int intel_guc_suspend(struct intel_guc *guc) * in progress so we need to take care of that ourselves as well. */ - I915_WRITE(SOFT_SCRATCH(14), INTEL_GUC_SLEEP_STATE_INVALID_MASK); + intel_uncore_write(uncore, SOFT_SCRATCH(14), + INTEL_GUC_SLEEP_STATE_INVALID_MASK); ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); if (ret) return ret; - ret = __intel_wait_for_register(&dev_priv->uncore, SOFT_SCRATCH(14), + ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), INTEL_GUC_SLEEP_STATE_INVALID_MASK, 0, 0, 10, &status); if (ret) @@ -586,19 +588,9 @@ int intel_guc_suspend(struct intel_guc *guc) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine) { - u32 data[7]; - - GEM_BUG_ON(!guc->execbuf_client); - - data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; - data[1] = engine->guc_id; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = guc->execbuf_client->stage_id; - data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); + /* XXX: to be implemented with submission interface rework */ - return intel_guc_send(guc, data, ARRAY_SIZE(data)); + return -ENODEV; } /** @@ -612,13 +604,27 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } /** - * DOC: GuC Address Space + * DOC: GuC Memory Management * - * The layout of GuC address space is shown below: + * GuC can't allocate any memory for its own usage, so all the allocations must + * be handled by the host driver. GuC accesses the memory via the GGTT, with the + * exception of the top and bottom parts of the 4GB address space, which are + * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM) + * or other parts of the HW. The driver must take care not to place objects that + * the GuC is going to access in these reserved ranges. The layout of the GuC + * address space is shown below: * * :: * @@ -658,17 +664,17 @@ int intel_guc_resume(struct intel_guc *guc) */ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_gem_object *obj; struct i915_vma *vma; u64 flags; int ret; - obj = i915_gem_object_create_shmem(dev_priv, size); + obj = i915_gem_object_create_shmem(gt->i915, size); if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) goto err; @@ -679,9 +685,43 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; } - return vma; + return i915_vma_make_unshrinkable(vma); err: i915_gem_object_put(obj); return vma; } + +/** + * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * @out_vma: return variable for the allocated vma pointer + * @out_vaddr: return variable for the obj mapping + * + * This wrapper calls intel_guc_allocate_vma() and then maps the allocated + * object with I915_MAP_WB. + * + * Return: 0 if successful, a negative errno code otherwise. + */ +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + *out_vma = vma; + *out_vaddr = vaddr; + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 08c906abdfa2..910d49590068 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -1,25 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ #ifndef _INTEL_GUC_H_ @@ -35,51 +16,46 @@ #include "i915_utils.h" #include "i915_vma.h" -struct guc_preempt_work { - struct work_struct work; - struct intel_engine_cs *engine; -}; +struct __guc_ads_blob; /* * Top level structure of GuC. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy - * ExecList submission. + * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList + * submission. */ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; struct intel_guc_ct ct; - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - /* intel_guc_recv interrupt related state */ spinlock_t irq_lock; unsigned int msg_enabled_mask; struct { bool enabled; - void (*reset)(struct drm_i915_private *i915); - void (*enable)(struct drm_i915_private *i915); - void (*disable)(struct drm_i915_private *i915); + void (*reset)(struct intel_guc *guc); + void (*enable)(struct intel_guc *guc); + void (*disable)(struct intel_guc *guc); } interrupts; + bool submission_supported; + struct i915_vma *ads_vma; + struct __guc_ads_blob *ads_blob; + struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct ida stage_ids; - struct i915_vma *shared_data; - void *shared_data_vaddr; - struct intel_guc_client *execbuf_client; - struct intel_guc_client *preempt_client; + struct i915_vma *workqueue; + void *workqueue_vaddr; + spinlock_t wq_lock; - struct guc_preempt_work preempt_work[I915_NUM_ENGINES]; - struct workqueue_struct *preempt_wq; + struct i915_vma *proc_desc; + void *proc_desc_vaddr; - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - /* Cyclic counter mod pagesize */ - u32 db_cacheline; + /* Control params for fw initialization */ + u32 params[GUC_CTL_MAX_DWORDS]; /* GuC's FW specific registers used in MMIO send */ struct { @@ -88,41 +64,33 @@ struct intel_guc { enum forcewake_domains fw_domains; } send_regs; - /* To serialize the intel_guc_send actions */ - struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len, - u32 *response_buf, u32 response_buf_size); + /* register used to send interrupts to the GuC FW */ + i915_reg_t notify_reg; - /* GuC's FW specific event handler function */ - void (*handler)(struct intel_guc *guc); + /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + u32 mmio_msg; - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; }; static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return guc->send(guc, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); } static inline int intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - return guc->send(guc, action, len, response_buf, response_buf_size); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); + return intel_guc_ct_send(&guc->ct, action, len, + response_buf, response_buf_size); } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) { - guc->handler(guc); + intel_guc_ct_event_handler(&guc->ct); } /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ @@ -154,17 +122,12 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); -void intel_guc_init_params(struct intel_guc *guc); -int intel_guc_init_misc(struct intel_guc *guc); +void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); -void intel_guc_fini_misc(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); +void intel_guc_notify(struct intel_guc *guc); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); -void intel_guc_to_host_event_handler(struct intel_guc *guc); -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); @@ -172,18 +135,37 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr); -static inline bool intel_guc_is_loaded(struct intel_guc *guc) +static inline bool intel_guc_is_supported(struct intel_guc *guc) { - return intel_uc_fw_is_loaded(&guc->fw); + return intel_uc_fw_is_supported(&guc->fw); +} + +static inline bool intel_guc_is_enabled(struct intel_guc *guc) +{ + return intel_uc_fw_is_enabled(&guc->fw); +} + +static inline bool intel_guc_is_running(struct intel_guc *guc) +{ + return intel_uc_fw_is_running(&guc->fw); } static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); + guc->mmio_msg = 0; + return 0; } +static inline bool intel_guc_is_submission_supported(struct intel_guc *guc) +{ + return guc->submission_supported; +} + static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) { spin_lock_irq(&guc->irq_lock); diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ecb69fc94218..101728006ae9 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -1,27 +1,9 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ +#include "gt/intel_gt.h" #include "intel_guc_ads.h" #include "intel_uc.h" #include "i915_drv.h" @@ -83,18 +65,14 @@ struct __guc_ads_blob { u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; } __packed; -static int __guc_ads_init(struct intel_guc *guc) +static void __guc_ads_init(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct __guc_ads_blob *blob; + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + struct __guc_ads_blob *blob = guc->ads_blob; const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; u8 engine_class; - blob = i915_gem_object_pin_map(guc->ads_vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) - return PTR_ERR(blob); - /* GuC scheduling policies */ guc_policies_init(&blob->policies); @@ -115,7 +93,8 @@ static int __guc_ads_init(struct intel_guc *guc) */ blob->ads.golden_context_lrca[engine_class] = 0; blob->ads.eng_state_size[engine_class] = - intel_engine_context_size(dev_priv, engine_class) - + intel_engine_context_size(guc_to_gt(guc), + engine_class) - skipped_size; } @@ -144,9 +123,7 @@ static int __guc_ads_init(struct intel_guc *guc) blob->ads.gt_system_info = base + ptr_offset(blob, system_info); blob->ads.clients_info = base + ptr_offset(blob, clients_info); - i915_gem_object_unpin_map(guc->ads_vma->obj); - - return 0; + i915_gem_object_flush_map(guc->ads_vma->obj); } /** @@ -159,31 +136,24 @@ static int __guc_ads_init(struct intel_guc *guc) int intel_guc_ads_create(struct intel_guc *guc) { const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); - struct i915_vma *vma; int ret; GEM_BUG_ON(guc->ads_vma); - vma = intel_guc_allocate_vma(guc, size); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - guc->ads_vma = vma; + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, + (void **)&guc->ads_blob); - ret = __guc_ads_init(guc); if (ret) - goto err_vma; + return ret; - return 0; + __guc_ads_init(guc); -err_vma: - i915_vma_unpin_and_release(&guc->ads_vma, 0); - return ret; + return 0; } void intel_guc_ads_destroy(struct intel_guc *guc) { - i915_vma_unpin_and_release(&guc->ads_vma, 0); + i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h new file mode 100644 index 000000000000..b00d3ae1113a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_ADS_H_ +#define _INTEL_GUC_ADS_H_ + +struct intel_guc; + +int intel_guc_ads_create(struct intel_guc *guc); +void intel_guc_ads_destroy(struct intel_guc *guc); +void intel_guc_ads_reset(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 3921809f812b..c6f971a049f9 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1,24 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2016-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * Copyright © 2016-2019 Intel Corporation */ #include "i915_drv.h" @@ -55,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w); */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { - /* we're using static channel owners */ - ct->host_channel.owner = CTB_OWNER_HOST; - - spin_lock_init(&ct->lock); - INIT_LIST_HEAD(&ct->pending_requests); - INIT_LIST_HEAD(&ct->incoming_requests); - INIT_WORK(&ct->worker, ct_incoming_request_worker_func); + spin_lock_init(&ct->requests.lock); + INIT_LIST_HEAD(&ct->requests.pending); + INIT_LIST_HEAD(&ct->requests.incoming); + INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); } static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) @@ -82,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) } static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size, u32 owner) + u32 cmds_addr, u32 size) { - CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", - desc, cmds_addr, size, owner); + CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; - desc->owner = owner; + desc->owner = CTB_OWNER_HOST; } static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) @@ -122,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, } static int guc_action_deregister_ct_buffer(struct intel_guc *guc, - u32 owner, u32 type) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - owner, + CTB_OWNER_HOST, type }; int err; @@ -135,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, /* Can't use generic send(), CT deregistration must go over MMIO */ err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) - DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", - guc_ct_buffer_type_to_str(type), owner, err); + DRM_ERROR("CT: deregister %s buffer failed; err=%d\n", + guc_ct_buffer_type_to_str(type), err); return err; } -static int ctch_init(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_init - Init buffer-based communication + * @ct: pointer to CT struct + * + * Allocate memory required for buffer-based communication. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_init(struct intel_guc_ct *ct) { - struct i915_vma *vma; + struct intel_guc *guc = ct_to_guc(ct); void *blob; int err; int i; - GEM_BUG_ON(ctch->vma); + GEM_BUG_ON(ct->vma); /* We allocate 1 page to hold both descriptors and both buffers. * ___________..................... @@ -172,71 +156,65 @@ static int ctch_init(struct intel_guc *guc, * other code will need updating as well. */ - /* allocate vma */ - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_out; + err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob); + if (err) { + DRM_ERROR("CT: channel allocation failed; err=%d\n", err); + return err; } - ctch->vma = vma; - /* map first page */ - blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) { - err = PTR_ERR(blob); - goto err_vma; - } CT_DEBUG_DRIVER("CT: vma base=%#x\n", - intel_guc_ggtt_offset(guc, ctch->vma)); + intel_guc_ggtt_offset(guc, ct->vma)); /* store pointers to desc and cmds */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { - GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i; - ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i; + ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; } return 0; - -err_vma: - i915_vma_unpin_and_release(&ctch->vma, 0); -err_out: - CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", - ctch->owner, err); - return err; } -static void ctch_fini(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_fini - Fini buffer-based communication + * @ct: pointer to CT struct + * + * Deallocate memory required for buffer-based communication. + */ +void intel_guc_ct_fini(struct intel_guc_ct *ct) { - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); - i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP); + i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP); } -static int ctch_enable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_enable(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); u32 base; int err; int i; - GEM_BUG_ON(!ctch->vma); - - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); /* vma should be already allocated and map'ed */ - base = intel_guc_ggtt_offset(guc, ctch->vma); + GEM_BUG_ON(!ct->vma); + base = intel_guc_ggtt_offset(guc, ct->vma); /* (re)initialize descriptors * cmds buffers are in the second half of the blob page */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - guc_ct_buffer_desc_init(ctch->ctbs[i].desc, + guc_ct_buffer_desc_init(ct->ctbs[i].desc, base + PAGE_SIZE/4 * i + PAGE_SIZE/2, - PAGE_SIZE/4, - ctch->owner); + PAGE_SIZE/4); } /* register buffers, starting wirh RECV buffer @@ -254,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc, if (unlikely(err)) goto err_deregister; - ctch->enabled = true; + ct->enabled = true; return 0; err_deregister: guc_action_deregister_ct_buffer(guc, - ctch->owner, INTEL_GUC_CT_BUFFER_TYPE_RECV); err_out: - DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err); + DRM_ERROR("CT: can't open channel; err=%d\n", err); return err; } -static void ctch_disable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + */ +void intel_guc_ct_disable(struct intel_guc_ct *ct) { - GEM_BUG_ON(!ctch->enabled); + struct intel_guc *guc = ct_to_guc(ct); - ctch->enabled = false; + GEM_BUG_ON(!ct->enabled); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_SEND); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct->enabled = false; + + if (intel_guc_is_running(guc)) { + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + } } -static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) +static u32 ct_get_next_fence(struct intel_guc_ct *ct) { /* For now it's trivial */ - return ++ctch->next_fence; + return ++ct->requests.next_fence; } /** @@ -458,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } -static int ctch_send(struct intel_guc_ct *ct, - struct intel_guc_ct_channel *ctch, - const u32 *action, - u32 len, - u32 *response_buf, - u32 response_buf_size, - u32 *status) +static int ct_send(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 *response_buf, + u32 response_buf_size, + u32 *status) { - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; struct ct_request request; unsigned long flags; u32 fence; int err; - GEM_BUG_ON(!ctch->enabled); + GEM_BUG_ON(!ct->enabled); GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); - fence = ctch_get_next_fence(ctch); + fence = ct_get_next_fence(ct); request.fence = fence; request.status = 0; request.response_len = response_buf_size; request.response_buf = response_buf; - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request.link, &ct->pending_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request.link, &ct->requests.pending); + spin_unlock_irqrestore(&ct->requests.lock, flags); err = ctb_write(ctb, action, len, fence, !!response_buf); if (unlikely(err)) @@ -519,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct, } unlink: - spin_lock_irqsave(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); list_del(&request.link); - spin_unlock_irqrestore(&ct->lock, flags); + spin_unlock_irqrestore(&ct->requests.lock, flags); return err; } @@ -529,18 +510,21 @@ unlink: /* * Command Transport (CT) buffer based GuC send function. */ -static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { - struct intel_guc_ct *ct = &guc->ct; - struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc *guc = ct_to_guc(ct); u32 status = ~0; /* undefined */ int ret; + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; + } + mutex_lock(&guc->send_mutex); - ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, - &status); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", action[0], ret, status); @@ -671,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); - spin_lock(&ct->lock); - list_for_each_entry(req, &ct->pending_requests, link) { + spin_lock(&ct->requests.lock); + list_for_each_entry(req, &ct->requests.pending, link) { if (unlikely(fence != req->fence)) { CT_DEBUG_DRIVER("CT: request %u awaits response\n", req->fence); @@ -690,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) found = true; break; } - spin_unlock(&ct->lock); + spin_unlock(&ct->requests.lock); if (!found) DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); @@ -728,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) u32 *payload; bool done; - spin_lock_irqsave(&ct->lock, flags); - request = list_first_entry_or_null(&ct->incoming_requests, + spin_lock_irqsave(&ct->requests.lock, flags); + request = list_first_entry_or_null(&ct->requests.incoming, struct ct_incoming_request, link); if (request) list_del(&request->link); - done = !!list_empty(&ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + done = !!list_empty(&ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); if (!request) return true; @@ -752,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) static void ct_incoming_request_worker_func(struct work_struct *w) { - struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + struct intel_guc_ct *ct = + container_of(w, struct intel_guc_ct, requests.worker); bool done; done = ct_process_incoming_requests(ct); if (!done) - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); } /** @@ -795,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) } memcpy(request->msg, msg, 4 * msglen); - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request->link, &ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request->link, &ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); return 0; } -static void ct_process_host_channel(struct intel_guc_ct *ct) +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +void intel_guc_ct_event_handler(struct intel_guc_ct *ct) { - struct intel_guc_ct_channel *ctch = &ct->host_channel; - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ int err = 0; - if (!ctch->enabled) + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected GuC event received while CT disabled!\n"); return; + } do { err = ctb_read(ctb, msg); @@ -830,100 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct) } } -/* - * When we're communicating with the GuC over CT, GuC uses events - * to notify us about new messages being posted on the RECV buffer. - */ -static void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) -{ - struct intel_guc_ct *ct = &guc->ct; - - ct_process_host_channel(ct); -} - -/** - * intel_guc_ct_init - Init CT communication - * @ct: pointer to CT struct - * - * Allocate memory required for communication via - * the CT channel. - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_init(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - int err; - - err = ctch_init(guc, ctch); - if (unlikely(err)) { - DRM_ERROR("CT: can't open channel %d; err=%d\n", - ctch->owner, err); - return err; - } - - GEM_BUG_ON(!ctch->vma); - return 0; -} - -/** - * intel_guc_ct_fini - Fini CT communication - * @ct: pointer to CT struct - * - * Deallocate memory required for communication via - * the CT channel. - */ -void intel_guc_ct_fini(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - ctch_fini(guc, ctch); -} - -/** - * intel_guc_ct_enable - Enable buffer based command transport. - * @ct: pointer to CT struct - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_enable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - int err; - - if (ctch->enabled) - return 0; - - err = ctch_enable(guc, ctch); - if (unlikely(err)) - return err; - - /* Switch into cmd transport buffer based send() */ - guc->send = intel_guc_send_ct; - guc->handler = intel_guc_to_host_event_handler_ct; - DRM_INFO("CT: %s\n", enableddisabled(true)); - return 0; -} - -/** - * intel_guc_ct_disable - Disable buffer based command transport. - * @ct: pointer to CT struct - */ -void intel_guc_ct_disable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - if (!ctch->enabled) - return; - - ctch_disable(guc, ctch); - - /* Disable send */ - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; - DRM_INFO("CT: %s\n", enableddisabled(false)); -} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h new file mode 100644 index 000000000000..3e7fe237cfa5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_CT_H_ +#define _INTEL_GUC_CT_H_ + +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include "intel_guc_fwif.h" + +struct i915_vma; +struct intel_guc; + +/** + * DOC: Command Transport (CT). + * + * Buffer based command transport is a replacement for MMIO based mechanism. + * It can be used to perform both host-2-guc and guc-to-host communication. + */ + +/** Represents single command transport buffer. + * + * A single command transport buffer consists of two parts, the header + * record (command transport buffer descriptor) and the actual buffer which + * holds the commands. + * + * @desc: pointer to the buffer descriptor + * @cmds: pointer to the commands buffer + */ +struct intel_guc_ct_buffer { + struct guc_ct_buffer_desc *desc; + u32 *cmds; +}; + + +/** Top-level structure for Command Transport related data + * + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. + */ +struct intel_guc_ct { + struct i915_vma *vma; + bool enabled; + + /* buffers for sending(0) and receiving(1) commands */ + struct intel_guc_ct_buffer ctbs[2]; + + struct { + u32 next_fence; /* fence to be used with next request to send */ + + spinlock_t lock; /* protects pending requests list */ + struct list_head pending; /* requests waiting for response */ + + struct list_head incoming; /* incoming requests */ + struct work_struct worker; /* handler for incoming requests */ + } requests; +}; + +void intel_guc_ct_init_early(struct intel_guc_ct *ct); +int intel_guc_ct_init(struct intel_guc_ct *ct); +void intel_guc_ct_fini(struct intel_guc_ct *ct); +int intel_guc_ct_enable(struct intel_guc_ct *ct); +void intel_guc_ct_disable(struct intel_guc_ct *ct); + +static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) +{ + return ct->enabled; +} + +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +void intel_guc_ct_event_handler(struct intel_guc_ct *ct); + +#endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c new file mode 100644 index 000000000000..3a1c47d600ea --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + * + * Authors: + * Vinit Azad <vinit.azad@intel.com> + * Ben Widawsky <ben@bwidawsk.net> + * Dave Gordon <david.s.gordon@intel.com> + * Alex Dai <yu.dai@intel.com> + */ + +#include "gt/intel_gt.h" +#include "intel_guc_fw.h" +#include "i915_drv.h" + +/** + * intel_guc_fw_init_early() - initializes GuC firmware struct + * @guc: intel_guc struct + * + * On platforms with GuC selects firmware for uploading + */ +void intel_guc_fw_init_early(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915), + INTEL_INFO(i915)->platform, INTEL_REVID(i915)); +} + +static void guc_prepare_xfer(struct intel_uncore *uncore) +{ + u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_MIA_CACHING | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING; + + /* Must program this register before loading the ucode with DMA */ + intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags); + + if (IS_GEN9_LP(uncore->i915)) + intel_uncore_write(uncore, GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + else + intel_uncore_write(uncore, GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + if (IS_GEN(uncore->i915, 9)) { + /* DOP Clock Gating Enable for GuC clocks */ + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); + + /* allows for 5us (in 10ns units) before GT can go to RC6 */ + intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); + } +} + +/* Copy RSA signature from the fw image to HW for verification */ +static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + size_t copied; + int i; + + copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa)); + GEM_BUG_ON(copied < sizeof(rsa)); + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]); +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) +{ + u32 val = intel_uncore_read(uncore, GUC_STATUS); + u32 uk_val = val & GS_UKERNEL_MASK; + + *status = val; + return (uk_val == GS_UKERNEL_READY) || + ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); +} + +static int guc_wait_ucode(struct intel_uncore *uncore) +{ + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms, so a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver may decide to reset the GuC and + * attempt the ucode load again if this happens.) + */ + ret = wait_for(guc_ready(uncore, &status), 100); + DRM_DEBUG_DRIVER("GuC status %#x\n", status); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + DRM_ERROR("GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { + DRM_ERROR("GuC firmware exception. EIP: %#x\n", + intel_uncore_read(uncore, SOFT_SCRATCH(13))); + ret = -ENXIO; + } + + return ret; +} + +/** + * intel_guc_fw_upload() - load GuC uCode to device + * @guc: intel_guc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. + * + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_guc_fw_upload(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_uncore *uncore = gt->uncore; + int ret; + + guc_prepare_xfer(uncore); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded via MMIO. + */ + guc_xfer_rsa(&guc->fw, uncore); + + /* + * Current uCode expects the code to be loaded at 8k; locations below + * this are used for the stack. + */ + ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); + if (ret) + goto out; + + ret = guc_wait_ucode(uncore); + if (ret) + goto out; + + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + +out: + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL); + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h new file mode 100644 index 000000000000..b5ab639d7259 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2017-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_FW_H_ +#define _INTEL_GUC_FW_H_ + +struct intel_guc; + +void intel_guc_fw_init_early(struct intel_guc *guc); +int intel_guc_fw_upload(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index f55f3bc8524d..a6b733c146c9 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -1,28 +1,15 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * Copyright © 2014-2019 Intel Corporation */ + #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H +#include <linux/bits.h> +#include <linux/compiler.h> +#include <linux/types.h> + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -39,17 +26,11 @@ #define GUC_VIDEO_ENGINE2 4 #define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) -/* - * XXX: Beware that Gen9 firmware 32.x uses wrong definition for - * GUC_MAX_INSTANCES_PER_CLASS (1) but this is harmless for us now - * as we are not enabling GuC submission mode where this will be used - */ #define GUC_MAX_ENGINE_CLASSES 5 -#define GUC_MAX_INSTANCES_PER_CLASS 4 +#define GUC_MAX_INSTANCES_PER_CLASS 16 #define GUC_DOORBELL_INVALID 256 -#define GUC_DB_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) /* Work queue item header definitions */ @@ -122,76 +103,6 @@ #define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */ -/** - * DOC: GuC Firmware Layout - * - * The GuC firmware layout looks like this: - * - * +-------------------------------+ - * | uc_css_header | - * | | - * | contains major/minor version | - * +-------------------------------+ - * | uCode | - * +-------------------------------+ - * | RSA signature | - * +-------------------------------+ - * | modulus key | - * +-------------------------------+ - * | exponent val | - * +-------------------------------+ - * - * The firmware may or may not have modulus key and exponent data. The header, - * uCode and RSA signature are must-have components that will be used by driver. - * Length of each components, which is all in dwords, can be found in header. - * In the case that modulus and exponent are not present in fw, a.k.a truncated - * image, the length value still appears in header. - * - * Driver will do some basic fw size validation based on the following rules: - * - * 1. Header, uCode and RSA are must-have components. - * 2. All firmware components, if they present, are in the sequence illustrated - * in the layout table above. - * 3. Length info of each component can be found in header, in dwords. - * 4. Modulus and exponent key are not required by driver. They may not appear - * in fw. So driver will load a truncated firmware in this case. - * - * HuC firmware layout is same as GuC firmware. - * Only HuC version information is saved in a different way. - */ - -struct uc_css_header { - u32 module_type; - /* header_size includes all non-uCode bits, including css_header, rsa - * key, modulus key and exponent data. */ - u32 header_size_dw; - u32 header_version; - u32 module_id; - u32 module_vendor; - u32 date; -#define CSS_DATE_DAY (0xFF << 0) -#define CSS_DATE_MONTH (0xFF << 8) -#define CSS_DATE_YEAR (0xFFFF << 16) - u32 size_dw; /* uCode plus header_size_dw */ - u32 key_size_dw; - u32 modulus_size_dw; - u32 exponent_size_dw; - u32 time; -#define CSS_TIME_HOUR (0xFF << 0) -#define CSS_DATE_MIN (0xFF << 8) -#define CSS_DATE_SEC (0xFFFF << 16) - char username[8]; - char buildnumber[12]; - u32 sw_version; -#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0) -#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16) -#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0) - u32 reserved[14]; - u32 header_info; -} __packed; - /* Work item for submitting workloads into work queue of GuC. */ struct guc_wq_item { u32 header; @@ -636,6 +547,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, @@ -644,7 +556,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index e3b83ecb90b5..caed0d57e704 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -1,31 +1,14 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ #include <linux/debugfs.h> -#include "intel_guc_log.h" +#include "gt/intel_gt.h" #include "i915_drv.h" +#include "i915_memcpy.h" +#include "intel_guc_log.h" static void guc_log_capture_logs(struct intel_guc_log *log); @@ -209,7 +192,7 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log, log->stats[type].sampled_overflow += 16; } - dev_notice_ratelimited(guc_to_i915(log_to_guc(log))->drm.dev, + dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev, "GuC log buffer overflow\n"); } @@ -243,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (WARN_ON(!intel_guc_log_relay_enabled(log))) + if (WARN_ON(!intel_guc_log_relay_created(log))) goto out_unlock; /* Get the pointer to shared GuC log buffer */ @@ -378,17 +361,19 @@ void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); INIT_WORK(&log->relay.flush_work, capture_logs_work); + log->relay.started = false; } static int guc_log_relay_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; lockdep_assert_held(&log->relay.lock); + GEM_BUG_ON(!log->vma); /* Keep the size of sub buffers same as shared log buffer */ subbuf_size = log->vma->size; @@ -429,7 +414,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) static void guc_log_capture_logs(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; guc_read_update_log_buffer(log); @@ -442,6 +427,29 @@ static void guc_log_capture_logs(struct intel_guc_log *log) guc_action_flush_log_complete(guc); } +static u32 __get_default_log_level(struct intel_guc_log *log) +{ + /* A negative value means "use platform/config default" */ + if (i915_modparams.guc_log_level < 0) { + return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? + GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE; + } + + if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "guc_log_level", i915_modparams.guc_log_level, + "verbosity too high"); + return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? + GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED; + } + + GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED); + GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX); + return i915_modparams.guc_log_level; +} + int intel_guc_log_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); @@ -481,7 +489,11 @@ int intel_guc_log_create(struct intel_guc_log *log) log->vma = vma; - log->level = i915_modparams.guc_log_level; + log->level = __get_default_log_level(log); + DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", + log->level, enableddisabled(log->level), + yesno(GUC_LOG_LEVEL_IS_VERBOSE(log->level)), + GUC_LOG_LEVEL_TO_VERBOSITY(log->level)); return 0; @@ -498,7 +510,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log) int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; int ret = 0; @@ -535,7 +547,7 @@ out_unlock: return ret; } -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +bool intel_guc_log_relay_created(const struct intel_guc_log *log) { return log->relay.buf_addr; } @@ -544,9 +556,12 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) { int ret; + if (!log->vma) + return -ENODEV; + mutex_lock(&log->relay.lock); - if (intel_guc_log_relay_enabled(log)) { + if (intel_guc_log_relay_created(log)) { ret = -EEXIST; goto out_unlock; } @@ -571,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; +} + +int intel_guc_log_relay_start(struct intel_guc_log *log) +{ + if (log->relay.started) + return -EEXIST; + guc_log_enable_flush_events(log); /* @@ -578,49 +608,61 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) * the flush notification. This means that we need to unconditionally * flush on relay enabling, since GuC only notifies us once. */ - queue_work(log->relay.flush_wq, &log->relay.flush_work); - - return 0; + queue_work(system_highpri_wq, &log->relay.flush_work); -out_relay: - guc_log_relay_destroy(log); -out_unlock: - mutex_unlock(&log->relay.lock); + log->relay.started = true; - return ret; + return 0; } void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; + if (!log->relay.started) + return; + /* * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. */ flush_work(&log->relay.flush_work); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); } -void intel_guc_log_relay_close(struct intel_guc_log *log) +/* + * Stops the relay log. Called from intel_guc_log_relay_close(), so no + * possibility of race with start/flush since relay_write cannot race + * relay_close. + */ +static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_i915(guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + if (!log->relay.started) + return; guc_log_disable_flush_events(log); - synchronize_irq(i915->drm.irq); + intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); + log->relay.started = false; +} + +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_relay_stop(log); + mutex_lock(&log->relay.lock); - GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + GEM_BUG_ON(!intel_guc_log_relay_created(log)); guc_log_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); @@ -628,5 +670,5 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) void intel_guc_log_handle_flush_event(struct intel_guc_log *log) { - queue_work(log->relay.flush_wq, &log->relay.flush_work); + queue_work(system_highpri_wq, &log->relay.flush_work); } diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 7bc763f10c03..c252c022c5fc 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -1,25 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ #ifndef _INTEL_GUC_LOG_H_ @@ -66,7 +47,7 @@ struct intel_guc_log { struct i915_vma *vma; struct { void *buf_addr; - struct workqueue_struct *flush_wq; + bool started; struct work_struct flush_work; struct rchan *channel; struct mutex lock; @@ -85,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +bool intel_guc_log_relay_created(const struct intel_guc_log *log); int intel_guc_log_relay_open(struct intel_guc_log *log); +int intel_guc_log_relay_start(struct intel_guc_log *log); void intel_guc_log_relay_flush(struct intel_guc_log *log); void intel_guc_log_relay_close(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index a214f8b71929..1949346e714e 100644 --- a/drivers/gpu/drm/i915/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -1,29 +1,16 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2014-2019 Intel Corporation */ + #ifndef _INTEL_GUC_REG_H_ #define _INTEL_GUC_REG_H_ +#include <linux/compiler.h> +#include <linux/types.h> + +#include "i915_reg.h" + /* Definitions of GuC H/W registers, bits, etc */ #define GUC_STATUS _MMIO(0xc000) @@ -37,6 +24,7 @@ #define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) #define GS_UKERNEL_LAPIC_DONE (0x30 << GS_UKERNEL_SHIFT) #define GS_UKERNEL_DPC_ERROR (0x60 << GS_UKERNEL_SHIFT) +#define GS_UKERNEL_EXCEPTION (0x70 << GS_UKERNEL_SHIFT) #define GS_UKERNEL_READY (0xF0 << GS_UKERNEL_SHIFT) #define GS_MIA_SHIFT 16 #define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) @@ -95,6 +83,9 @@ #define GEN8_GTCR _MMIO(0x4274) #define GEN8_GTCR_INVALIDATE (1<<0) +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + #define GUC_ARAT_C6DIS _MMIO(0xA178) #define GUC_SHIM_CONTROL _MMIO(0xc064) @@ -135,21 +126,21 @@ struct guc_doorbell_info { #define GUC_PM_P24C_IER _MMIO(0xC55C) /* GuC Interrupt Vector */ -#define GEN11_GUC_INTR_GUC2HOST (1 << 15) -#define GEN11_GUC_INTR_EXEC_ERROR (1 << 14) -#define GEN11_GUC_INTR_DISPLAY_EVENT (1 << 13) -#define GEN11_GUC_INTR_SEM_SIG (1 << 12) -#define GEN11_GUC_INTR_IOMMU2GUC (1 << 11) -#define GEN11_GUC_INTR_DOORBELL_RANG (1 << 10) -#define GEN11_GUC_INTR_DMA_DONE (1 << 9) -#define GEN11_GUC_INTR_FATAL_ERROR (1 << 8) -#define GEN11_GUC_INTR_NOTIF_ERROR (1 << 7) -#define GEN11_GUC_INTR_SW_INT_6 (1 << 6) -#define GEN11_GUC_INTR_SW_INT_5 (1 << 5) -#define GEN11_GUC_INTR_SW_INT_4 (1 << 4) -#define GEN11_GUC_INTR_SW_INT_3 (1 << 3) -#define GEN11_GUC_INTR_SW_INT_2 (1 << 2) -#define GEN11_GUC_INTR_SW_INT_1 (1 << 1) -#define GEN11_GUC_INTR_SW_INT_0 (1 << 0) +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c new file mode 100644 index 000000000000..9e42324fdecd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -0,0 +1,682 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014 Intel Corporation + */ + +#include <linux/circ_buf.h> + +#include "gem/i915_gem_context.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" + +#include "intel_guc_submission.h" + +#include "i915_drv.h" +#include "i915_trace.h" + +/** + * DOC: GuC-based command submission + * + * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC + * firmware is moving to an updated submission interface and we plan to + * turn submission back on when that lands. The below documentation (and related + * code) matches the old submission model and will be updated as part of the + * upgrade to the new flow. + * + * GuC stage descriptor: + * During initialization, the driver allocates a static pool of 1024 such + * descriptors, and shares them with the GuC. Currently, we only use one + * descriptor. This stage descriptor lets the GuC know about the workqueue and + * process descriptor. Theoretically, it also lets the GuC know about our HW + * contexts (context ID, etc...), but we actually employ a kind of submission + * where the GuC uses the LRCA sent via the work item instead. This is called + * a "proxy" submission. + * + * The Scratch registers: + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then + * triggers an interrupt on the GuC via another register write (0xC4C8). + * Firmware writes a success/fail code back to the action register after + * processes the request. The kernel driver polls waiting for this update and + * then proceeds. + * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See guc_add_request() + * + */ + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) +{ + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + + return &base[id]; +} + +static int guc_workqueue_create(struct intel_guc *guc) +{ + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, + &guc->workqueue_vaddr); +} + +static void guc_workqueue_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); +} + +/* + * Initialise the process descriptor shared with the GuC firmware. + */ +static int guc_proc_desc_create(struct intel_guc *guc) +{ + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); + + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, + &guc->proc_desc_vaddr); +} + +static void guc_proc_desc_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); +} + +static void guc_proc_desc_init(struct intel_guc *guc) +{ + struct guc_process_desc *desc; + + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); + + /* + * XXX: pDoorbell and WQVBaseAddress are pointers in process address + * space for ring3 clients (set them as in mmap_ioctl) or kernel + * space for kernel clients (map on demand instead? May make debug + * easier to have it mapped). + */ + desc->wq_base_addr = 0; + desc->db_base_addr = 0; + + desc->wq_size_bytes = GUC_WQ_SIZE; + desc->wq_status = WQ_STATUS_ACTIVE; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; +} + +static void guc_proc_desc_fini(struct intel_guc *guc) +{ + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); +} + +static int guc_stage_desc_pool_create(struct intel_guc *guc) +{ + u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS); + + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, + &guc->stage_desc_pool_vaddr); +} + +static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); +} + +/* + * Initialise/clear the stage descriptor shared with the GuC firmware. + * + * This descriptor tells the GuC where (in GGTT space) to find the important + * data structures related to work submission (process descriptor, write queue, + * etc). + */ +static void guc_stage_desc_init(struct intel_guc *guc) +{ + struct guc_stage_desc *desc; + + /* we only use 1 stage desc, so hardcode it to 0 */ + desc = __get_stage_desc(guc, 0); + memset(desc, 0, sizeof(*desc)); + + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | + GUC_STAGE_DESC_ATTR_KERNEL; + + desc->stage_id = 0; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); + desc->wq_size = GUC_WQ_SIZE; +} + +static void guc_stage_desc_fini(struct intel_guc *guc) +{ + struct guc_stage_desc *desc; + + desc = __get_stage_desc(guc, 0); + memset(desc, 0, sizeof(*desc)); +} + +/* Construct a Work Item and append it to the GuC's Work Queue */ +static void guc_wq_item_append(struct intel_guc *guc, + u32 target_engine, u32 context_desc, + u32 ring_tail, u32 fence_id) +{ + /* wqi_len is in DWords, and does not include the one-word header */ + const size_t wqi_size = sizeof(struct guc_wq_item); + const u32 wqi_len = wqi_size / sizeof(u32) - 1; + struct guc_process_desc *desc = guc->proc_desc_vaddr; + struct guc_wq_item *wqi; + u32 wq_off; + + lockdep_assert_held(&guc->wq_lock); + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + BUILD_BUG_ON(wqi_size != 16); + + /* We expect the WQ to be active if we're appending items to it */ + GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE); + + /* Free space is guaranteed. */ + wq_off = READ_ONCE(desc->tail); + GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), + GUC_WQ_SIZE) < wqi_size); + GEM_BUG_ON(wq_off & (wqi_size - 1)); + + wqi = guc->workqueue_vaddr + wq_off; + + /* Now fill in the 4-word work queue item */ + wqi->header = WQ_TYPE_INORDER | + (wqi_len << WQ_LEN_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + wqi->context_desc = context_desc; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; + + /* Make the update visible to GuC */ + WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); +} + +static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); + + guc_wq_item_append(guc, engine->guc_id, ctx_desc, + ring_tail, rq->fence.seqno); +} + +/* + * When we're doing submissions using regular execlists backend, writing to + * ELSP from CPU side is enough to make sure that writes to ringbuffer pages + * pinned in mappable aperture portion of GGTT are visible to command streamer. + * Writes done by GuC on our behalf are not guaranteeing such ordering, + * therefore, to ensure the flush, we're issuing a POSTING READ. + */ +static void flush_ggtt_writes(struct i915_vma *vma) +{ + if (i915_vma_is_map_and_fenceable(vma)) + intel_uncore_posting_read_fw(vma->vm->gt->uncore, + GUC_STATUS); +} + +static void guc_submit(struct intel_engine_cs *engine, + struct i915_request **out, + struct i915_request **end) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + + spin_lock(&guc->wq_lock); + + do { + struct i915_request *rq = *out++; + + flush_ggtt_writes(rq->ring->vma); + guc_add_request(guc, rq); + } while (out != end); + + spin_unlock(&guc->wq_lock); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority | __NO_PREEMPTION; +} + +static struct i915_request *schedule_in(struct i915_request *rq, int idx) +{ + trace_i915_request_in(rq, idx); + + /* + * Currently we are not tracking the rq->context being inflight + * (ce->inflight = rq->engine). It is only used by the execlists + * backend at the moment, a similar counting strategy would be + * required if we generalise the inflight tracking. + */ + + __intel_gt_pm_get(rq->engine->gt); + return i915_request_get(rq); +} + +static void schedule_out(struct i915_request *rq) +{ + trace_i915_request_out(rq); + + intel_gt_pm_put_async(rq->engine->gt); + i915_request_put(rq); +} + +static void __guc_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request **first = execlists->inflight; + struct i915_request ** const last_port = first + execlists->port_mask; + struct i915_request *last = first[0]; + struct i915_request **port; + bool submit = false; + struct rb_node *rb; + + lockdep_assert_held(&engine->active.lock); + + if (last) { + if (*++first) + return; + + last = NULL; + } + + /* + * We write directly into the execlists->inflight queue and don't use + * the execlists->pending queue, as we don't have a distinct switch + * event. + */ + port = first; + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + if (last && rq->context != last->context) { + if (port == last_port) + goto done; + + *port = schedule_in(last, + port - execlists->inflight); + port++; + } + + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + submit = true; + last = rq; + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } +done: + execlists->queue_priority_hint = + rb ? to_priolist(rb)->priority : INT_MIN; + if (submit) { + *port = schedule_in(last, port - execlists->inflight); + *++port = NULL; + guc_submit(engine, first, port); + } + execlists->active = execlists->inflight; +} + +static void guc_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request **port, *rq; + unsigned long flags; + + spin_lock_irqsave(&engine->active.lock, flags); + + for (port = execlists->inflight; (rq = *port); port++) { + if (!i915_request_completed(rq)) + break; + + schedule_out(rq); + } + if (port != execlists->inflight) { + int idx = port - execlists->inflight; + int rem = ARRAY_SIZE(execlists->inflight) - idx; + memmove(execlists->inflight, port, rem * sizeof(*port)); + } + + __guc_dequeue(engine); + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_reset_prepare(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + ENGINE_TRACE(engine, "\n"); + + /* + * Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its execlists->tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the execlists->tasklet until the reset is over + * prevents the race. + */ + __tasklet_disable_sync_once(&execlists->tasklet); +} + +static void +cancel_port_requests(struct intel_engine_execlists * const execlists) +{ + struct i915_request * const *port, *rq; + + /* Note we are only using the inflight and not the pending queue */ + + for (port = execlists->active; (rq = *port); port++) + schedule_out(rq); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); +} + +static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; + unsigned long flags; + + spin_lock_irqsave(&engine->active.lock, flags); + + cancel_port_requests(execlists); + + /* Push back any incomplete requests for replay after the reset. */ + rq = execlists_unwind_incomplete_requests(execlists); + if (!rq) + goto out_unlock; + + if (!i915_request_started(rq)) + stalled = false; + + __i915_request_reset(rq, stalled); + intel_lr_context_reset(engine, rq->context, rq->head, stalled); + +out_unlock: + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_reset_cancel(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + ENGINE_TRACE(engine, "\n"); + + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + spin_lock_irqsave(&engine->active.lock, flags); + + /* Cancel the requests on the HW and clear the ELSP tracker. */ + cancel_port_requests(execlists); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->active.requests, sched.link) { + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + + i915_request_mark_complete(rq); + } + + /* Flush the queued requests to the timeline list (for retiring). */ + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_reset_finish(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (__tasklet_enable(&execlists->tasklet)) + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&execlists->tasklet); + + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); +} + +/* + * Everything below here is concerned with setup & teardown, and is + * therefore not part of the somewhat time-critical batch-submission + * path of guc_submit() above. + */ + +/* + * Set up the memory resources to be shared with the GuC (via the GGTT) + * at firmware loading time. + */ +int intel_guc_submission_init(struct intel_guc *guc) +{ + int ret; + + if (guc->stage_desc_pool) + return 0; + + ret = guc_stage_desc_pool_create(guc); + if (ret) + return ret; + /* + * Keep static analysers happy, let them know that we allocated the + * vma after testing that it didn't exist earlier. + */ + GEM_BUG_ON(!guc->stage_desc_pool); + + ret = guc_workqueue_create(guc); + if (ret) + goto err_pool; + + ret = guc_proc_desc_create(guc); + if (ret) + goto err_workqueue; + + spin_lock_init(&guc->wq_lock); + + return 0; + +err_workqueue: + guc_workqueue_destroy(guc); +err_pool: + guc_stage_desc_pool_destroy(guc); + return ret; +} + +void intel_guc_submission_fini(struct intel_guc *guc) +{ + if (guc->stage_desc_pool) { + guc_proc_desc_destroy(guc); + guc_workqueue_destroy(guc); + guc_stage_desc_pool_destroy(guc); + } +} + +static void guc_interrupts_capture(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; + + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); + + /* Don't handle the ctx switch interrupt in GuC submission mode */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0); +} + +static void guc_interrupts_release(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; + + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); + + /* Handle ctx switch interrupts again */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); +} + +static void guc_set_default_submission(struct intel_engine_cs *engine) +{ + /* + * We inherit a bunch of functions from execlists that we'd like + * to keep using: + * + * engine->submit_request = execlists_submit_request; + * engine->cancel_requests = execlists_cancel_requests; + * engine->schedule = execlists_schedule; + * + * But we need to override the actual submission backend in order + * to talk to the GuC. + */ + intel_execlists_set_default_submission(engine); + + engine->execlists.tasklet.func = guc_submission_tasklet; + + /* do not use execlists park/unpark */ + engine->park = engine->unpark = NULL; + + engine->reset.prepare = guc_reset_prepare; + engine->reset.rewind = guc_reset_rewind; + engine->reset.cancel = guc_reset_cancel; + engine->reset.finish = guc_reset_finish; + + engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; + engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; + + /* + * For the breadcrumb irq to work we need the interrupts to stay + * enabled. However, on all platforms on which we'll have support for + * GuC submission we don't allow disabling the interrupts at runtime, so + * we're always safe with the current flow. + */ + GEM_BUG_ON(engine->irq_enable || engine->irq_disable); +} + +void intel_guc_submission_enable(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * We're using GuC work items for submitting work through GuC. Since + * we're coalescing multiple requests from a single context into a + * single work item prior to assigning it to execlist_port, we can + * never have more work items than the total number of ports (for all + * engines). The GuC firmware is controlling the HEAD of work queue, + * and it is guaranteed that it will remove the work item from the + * queue before our request is completed. + */ + BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * + sizeof(struct guc_wq_item) * + I915_NUM_ENGINES > GUC_WQ_SIZE); + + guc_proc_desc_init(guc); + guc_stage_desc_init(guc); + + /* Take over from manual control of ELSP (execlists) */ + guc_interrupts_capture(gt); + + for_each_engine(engine, gt, id) { + engine->set_default_submission = guc_set_default_submission; + engine->set_default_submission(engine); + } +} + +void intel_guc_submission_disable(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + GEM_BUG_ON(gt->awake); /* GT should be parked first */ + + /* Note: By the time we're here, GuC may have already been reset */ + + guc_interrupts_release(gt); + + guc_stage_desc_fini(guc); + guc_proc_desc_fini(guc); +} + +static bool __guc_submission_support(struct intel_guc *guc) +{ + /* XXX: GuC submission is unavailable for now */ + return false; + + if (!intel_guc_is_supported(guc)) + return false; + + return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; +} + +void intel_guc_submission_init_early(struct intel_guc *guc) +{ + guc->submission_supported = __guc_submission_support(guc); +} + +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == guc_set_default_submission; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h new file mode 100644 index 000000000000..e402a2932592 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_SUBMISSION_H_ +#define _INTEL_GUC_SUBMISSION_H_ + +#include <linux/types.h> + +struct intel_guc; +struct intel_engine_cs; + +void intel_guc_submission_init_early(struct intel_guc *guc); +int intel_guc_submission_init(struct intel_guc *guc); +void intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_disable(struct intel_guc *guc); +void intel_guc_submission_fini(struct intel_guc *guc); +int intel_guc_preempt_work_create(struct intel_guc *guc); +void intel_guc_preempt_work_destroy(struct intel_guc *guc); +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c new file mode 100644 index 000000000000..32a069841c14 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include <linux/types.h> + +#include "gt/intel_gt.h" +#include "intel_huc.h" +#include "i915_drv.h" + +/** + * DOC: HuC + * + * The HuC is a dedicated microcontroller for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can directly use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * The kernel driver is only responsible for loading the HuC firmware and + * triggering its security authentication, which is performed by the GuC. For + * The GuC to correctly perform the authentication, the HuC binary must be + * loaded before the GuC one. Loading the HuC is optional; however, not using + * the HuC might negatively impact power usage and/or performance of media + * workloads, depending on the use-cases. + * + * See https://github.com/intel/media-driver for the latest details on HuC + * functionality. + */ + +/** + * DOC: HuC Memory Management + * + * Similarly to the GuC, the HuC can't do any memory allocations on its own, + * with the difference being that the allocations for HuC usage are handled by + * the userspace driver instead of the kernel one. The HuC accesses the memory + * via the PPGTT belonging to the context loaded on the VCS executing the + * HuC-specific commands. + */ + +void intel_huc_init_early(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_gt(huc)->i915; + + intel_huc_fw_init_early(huc); + + if (INTEL_GEN(i915) >= 11) { + huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; + huc->status.mask = HUC_LOAD_SUCCESSFUL; + huc->status.value = HUC_LOAD_SUCCESSFUL; + } else { + huc->status.reg = HUC_STATUS2; + huc->status.mask = HUC_FW_VERIFIED; + huc->status.value = HUC_FW_VERIFIED; + } +} + +static int intel_huc_rsa_data_create(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_guc *guc = >->uc.guc; + struct i915_vma *vma; + size_t copied; + void *vaddr; + int err; + + err = i915_inject_probe_error(gt->i915, -ENXIO); + if (err) + return err; + + /* + * HuC firmware will sit above GUC_GGTT_TOP and will not map + * through GTT. Unfortunately, this means GuC cannot perform + * the HuC auth. as the rsa offset now falls within the GuC + * inaccessible range. We resort to perma-pinning an additional + * vma within the accessible range that only contains the rsa + * signature. The GuC can use this extra pinning to perform + * the authentication since its GGTT offset will be GuC + * accessible. + */ + GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE); + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); + GEM_BUG_ON(copied < huc->fw.rsa_size); + + i915_gem_object_unpin_map(vma->obj); + + huc->rsa_data = vma; + + return 0; +} + +static void intel_huc_rsa_data_destroy(struct intel_huc *huc) +{ + i915_vma_unpin_and_release(&huc->rsa_data, 0); +} + +int intel_huc_init(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_gt(huc)->i915; + int err; + + err = intel_uc_fw_init(&huc->fw); + if (err) + goto out; + + /* + * HuC firmware image is outside GuC accessible range. + * Copy the RSA signature out of the image into + * a perma-pinned region set aside for it + */ + err = intel_huc_rsa_data_create(huc); + if (err) + goto out_fini; + + return 0; + +out_fini: + intel_uc_fw_fini(&huc->fw); +out: + intel_uc_fw_cleanup_fetch(&huc->fw); + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err); + return err; +} + +void intel_huc_fini(struct intel_huc *huc) +{ + if (!intel_uc_fw_is_available(&huc->fw)) + return; + + intel_huc_rsa_data_destroy(huc); + intel_uc_fw_fini(&huc->fw); +} + +/** + * intel_huc_auth() - Authenticate HuC uCode + * @huc: intel_huc structure + * + * Called after HuC and GuC firmware loading during intel_uc_init_hw(). + * + * This function invokes the GuC action to authenticate the HuC firmware, + * passing the offset of the RSA signature to intel_guc_auth_huc(). It then + * waits for up to 50ms for firmware verification ACK. + */ +int intel_huc_auth(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_guc *guc = >->uc.guc; + int ret; + + GEM_BUG_ON(intel_huc_is_authenticated(huc)); + + if (!intel_uc_fw_is_loaded(&huc->fw)) + return -ENOEXEC; + + ret = i915_inject_probe_error(gt->i915, -ENXIO); + if (ret) + goto fail; + + ret = intel_guc_auth_huc(guc, + intel_guc_ggtt_offset(guc, huc->rsa_data)); + if (ret) { + DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); + goto fail; + } + + /* Check authentication status, it should be done by now */ + ret = __intel_wait_for_register(gt->uncore, + huc->status.reg, + huc->status.mask, + huc->status.value, + 2, 50, NULL); + if (ret) { + DRM_ERROR("HuC: Firmware not verified %d\n", ret); + goto fail; + } + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + +fail: + i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL); + return ret; +} + +/** + * intel_huc_check_status() - check HuC status + * @huc: intel_huc structure + * + * This function reads status register to verify if HuC + * firmware was successfully loaded. + * + * Returns: 1 if HuC firmware is loaded and verified, + * 0 if HuC firmware is not loaded and -ENODEV if HuC + * is not present on this platform. + */ +int intel_huc_check_status(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + intel_wakeref_t wakeref; + u32 status = 0; + + if (!intel_huc_is_supported(huc)) + return -ENODEV; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + status = intel_uncore_read(gt->uncore, huc->status.reg); + + return (status & huc->status.mask) == huc->status.value; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h new file mode 100644 index 000000000000..644c059fe01d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +#include "i915_reg.h" +#include "intel_uc_fw.h" +#include "intel_huc_fw.h" + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ + struct i915_vma *rsa_data; + + struct { + i915_reg_t reg; + u32 mask; + u32 value; + } status; +}; + +void intel_huc_init_early(struct intel_huc *huc); +int intel_huc_init(struct intel_huc *huc); +void intel_huc_fini(struct intel_huc *huc); +int intel_huc_auth(struct intel_huc *huc); +int intel_huc_check_status(struct intel_huc *huc); + +static inline int intel_huc_sanitize(struct intel_huc *huc) +{ + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + +static inline bool intel_huc_is_supported(struct intel_huc *huc) +{ + return intel_uc_fw_is_supported(&huc->fw); +} + +static inline bool intel_huc_is_enabled(struct intel_huc *huc) +{ + return intel_uc_fw_is_enabled(&huc->fw); +} + +static inline bool intel_huc_is_authenticated(struct intel_huc *huc) +{ + return intel_uc_fw_is_running(&huc->fw); +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c new file mode 100644 index 000000000000..eee193bf2cc4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "intel_huc_fw.h" +#include "i915_drv.h" + +/** + * intel_huc_fw_init_early() - initializes HuC firmware struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_fw_init_early(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_uc *uc = >->uc; + struct drm_i915_private *i915 = gt->i915; + + intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, + intel_uc_uses_guc(uc), + INTEL_INFO(i915)->platform, INTEL_REVID(i915)); +} + +/** + * intel_huc_fw_upload() - load HuC uCode to device + * @huc: intel_huc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. Note that HuC must be loaded before GuC. + * + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_huc_fw_upload(struct intel_huc *huc) +{ + /* HW doesn't look at destination address for HuC, so set it to 0 */ + return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); +} diff --git a/drivers/gpu/drm/i915/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h index 8a00a0ebddc5..b791269ce923 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h @@ -1,7 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation + * Copyright © 2014-2019 Intel Corporation */ #ifndef _INTEL_HUC_FW_H_ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c new file mode 100644 index 000000000000..64934a876a50 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "gt/intel_reset.h" +#include "intel_guc.h" +#include "intel_guc_ads.h" +#include "intel_guc_submission.h" +#include "intel_uc.h" + +#include "i915_drv.h" + +static const struct intel_uc_ops uc_ops_off; +static const struct intel_uc_ops uc_ops_on; + +/* Reset GuC providing us with fresh state for both GuC and HuC. + */ +static int __intel_uc_reset_hw(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + int ret; + u32 guc_status; + + ret = i915_inject_probe_error(gt->i915, -ENXIO); + if (ret) + return ret; + + ret = intel_reset_guc(gt); + if (ret) { + DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); + return ret; + } + + guc_status = intel_uncore_read(gt->uncore, GUC_STATUS); + WARN(!(guc_status & GS_MIA_IN_RESET), + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + + return ret; +} + +static void __confirm_options(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, + "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + i915_modparams.enable_guc, + yesno(intel_uc_uses_guc(uc)), + yesno(intel_uc_uses_guc_submission(uc)), + yesno(intel_uc_uses_huc(uc))); + + if (i915_modparams.enable_guc == -1) + return; + + if (i915_modparams.enable_guc == 0) { + GEM_BUG_ON(intel_uc_uses_guc(uc)); + GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); + GEM_BUG_ON(intel_uc_uses_huc(uc)); + return; + } + + if (!intel_uc_supports_guc(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "GuC is not supported!"); + + if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC && + !intel_uc_supports_huc(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "HuC is not supported!"); + + if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION && + !intel_uc_supports_guc_submission(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "GuC submission is N/A"); + + if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "undocumented flag"); +} + +void intel_uc_init_early(struct intel_uc *uc) +{ + intel_guc_init_early(&uc->guc); + intel_huc_init_early(&uc->huc); + + __confirm_options(uc); + + if (intel_uc_uses_guc(uc)) + uc->ops = &uc_ops_on; + else + uc->ops = &uc_ops_off; +} + +void intel_uc_driver_late_release(struct intel_uc *uc) +{ +} + +/** + * intel_uc_init_mmio - setup uC MMIO access + * @uc: the intel_uc structure + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct intel_uc *uc) +{ + intel_guc_init_send_regs(&uc->guc); +} + +static void __uc_capture_load_err_log(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (guc->log.vma && !uc->load_err_log) + uc->load_err_log = i915_gem_object_get(guc->log.vma->obj); +} + +static void __uc_free_load_err_log(struct intel_uc *uc) +{ + struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log); + + if (log) + i915_gem_object_put(log); +} + +static inline bool guc_communication_enabled(struct intel_guc *guc) +{ + return intel_guc_ct_enabled(&guc->ct); +} + +/* + * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 + * register using the same bits used in the CT message payload. Since our + * communication channel with guc is turned off at this point, we can save the + * message and handle it after we turn it back on. + */ +static void guc_clear_mmio_msg(struct intel_guc *guc) +{ + intel_uncore_write(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15), 0); +} + +static void guc_get_mmio_msg(struct intel_guc *guc) +{ + u32 val; + + spin_lock_irq(&guc->irq_lock); + + val = intel_uncore_read(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15)); + guc->mmio_msg |= val & guc->msg_enabled_mask; + + /* + * clear all events, including the ones we're not currently servicing, + * to make sure we don't try to process a stale message if we enable + * handling of more events later. + */ + guc_clear_mmio_msg(guc); + + spin_unlock_irq(&guc->irq_lock); +} + +static void guc_handle_mmio_msg(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + /* we need communication to be enabled to reply to GuC */ + GEM_BUG_ON(!guc_communication_enabled(guc)); + + if (!guc->mmio_msg) + return; + + spin_lock_irq(&i915->irq_lock); + intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1); + spin_unlock_irq(&i915->irq_lock); + + guc->mmio_msg = 0; +} + +static void guc_reset_interrupts(struct intel_guc *guc) +{ + guc->interrupts.reset(guc); +} + +static void guc_enable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.enable(guc); +} + +static void guc_disable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.disable(guc); +} + +static int guc_enable_communication(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int ret; + + GEM_BUG_ON(guc_communication_enabled(guc)); + + ret = i915_inject_probe_error(i915, -ENXIO); + if (ret) + return ret; + + ret = intel_guc_ct_enable(&guc->ct); + if (ret) + return ret; + + /* check for mmio messages received before/during the CT enable */ + guc_get_mmio_msg(guc); + guc_handle_mmio_msg(guc); + + guc_enable_interrupts(guc); + + /* check for CT messages received before we enabled interrupts */ + spin_lock_irq(&i915->irq_lock); + intel_guc_ct_event_handler(&guc->ct); + spin_unlock_irq(&i915->irq_lock); + + DRM_INFO("GuC communication enabled\n"); + + return 0; +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + /* + * Events generated during or after CT disable are logged by guc in + * via mmio. Make sure the register is clear before disabling CT since + * all events we cared about have already been processed via CT. + */ + guc_clear_mmio_msg(guc); + + guc_disable_interrupts(guc); + + intel_guc_ct_disable(&guc->ct); + + /* + * Check for messages received during/after the CT disable. We do not + * expect any messages to have arrived via CT between the interrupt + * disable and the CT disable because GuC should've been idle until we + * triggered the CT disable protocol. + */ + guc_get_mmio_msg(guc); + + DRM_INFO("GuC communication disabled\n"); +} + +static void __uc_fetch_firmwares(struct intel_uc *uc) +{ + int err; + + GEM_BUG_ON(!intel_uc_uses_guc(uc)); + + err = intel_uc_fw_fetch(&uc->guc.fw); + if (err) + return; + + if (intel_uc_uses_huc(uc)) + intel_uc_fw_fetch(&uc->huc.fw); +} + +static void __uc_cleanup_firmwares(struct intel_uc *uc) +{ + intel_uc_fw_cleanup_fetch(&uc->huc.fw); + intel_uc_fw_cleanup_fetch(&uc->guc.fw); +} + +static void __uc_init(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret; + + GEM_BUG_ON(!intel_uc_uses_guc(uc)); + + /* XXX: GuC submission is unavailable for now */ + GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); + + ret = intel_guc_init(guc); + if (ret) { + intel_uc_fw_cleanup_fetch(&huc->fw); + return; + } + + if (intel_uc_uses_huc(uc)) + intel_huc_init(huc); +} + +static void __uc_fini(struct intel_uc *uc) +{ + intel_huc_fini(&uc->huc); + intel_guc_fini(&uc->guc); + + __uc_free_load_err_log(uc); +} + +static int __uc_sanitize(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + + intel_huc_sanitize(huc); + intel_guc_sanitize(guc); + + return __intel_uc_reset_hw(uc); +} + +/* Initialize and verify the uC regs related to uC positioning in WOPCM */ +static int uc_init_wopcm(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + struct intel_uncore *uncore = gt->uncore; + u32 base = intel_wopcm_guc_base(>->i915->wopcm); + u32 size = intel_wopcm_guc_size(>->i915->wopcm); + u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0; + u32 mask; + int err; + + if (unlikely(!base || !size)) { + i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n"); + return -E2BIG; + } + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + GEM_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + GEM_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); + GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); + GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + + err = i915_inject_probe_error(gt->i915, -ENXIO); + if (err) + return err; + + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; + err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask, + size | GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = intel_uncore_write_and_verify(uncore, DMA_GUC_WOPCM_OFFSET, + base | huc_agent, mask, + base | huc_agent | + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n"); + i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", + i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET), + intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET)); + i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", + i915_mmio_reg_offset(GUC_WOPCM_SIZE), + intel_uncore_read(uncore, GUC_WOPCM_SIZE)); + + return err; +} + +static bool uc_is_wopcm_locked(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + struct intel_uncore *uncore = gt->uncore; + + return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) || + (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID); +} + +static int __uc_check_hw(struct intel_uc *uc) +{ + if (!intel_uc_supports_guc(uc)) + return 0; + + /* + * We can silently continue without GuC only if it was never enabled + * before on this system after reboot, otherwise we risk GPU hangs. + * To check if GuC was loaded before we look at WOPCM registers. + */ + if (uc_is_wopcm_locked(uc)) + return -EIO; + + return 0; +} + +static int __uc_init_hw(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret, attempts; + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + GEM_BUG_ON(!intel_uc_uses_guc(uc)); + + if (!intel_uc_fw_is_available(&guc->fw)) { + ret = __uc_check_hw(uc) || + intel_uc_fw_is_overridden(&guc->fw) || + intel_uc_supports_guc_submission(uc) ? + intel_uc_fw_status_to_error(guc->fw.status) : 0; + goto err_out; + } + + ret = uc_init_wopcm(uc); + if (ret) + goto err_out; + + guc_reset_interrupts(guc); + + /* WaEnableuKernelHeaderValidFix:skl */ + /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ + if (IS_GEN(i915, 9)) + attempts = 3; + else + attempts = 1; + + while (attempts--) { + /* + * Always reset the GuC just before (re)loading, so + * that the state and timing are fairly predictable + */ + ret = __uc_sanitize(uc); + if (ret) + goto err_out; + + intel_huc_fw_upload(huc); + intel_guc_ads_reset(guc); + intel_guc_write_params(guc); + ret = intel_guc_fw_upload(guc); + if (ret == 0) + break; + + DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " + "retry %d more time(s)\n", ret, attempts); + } + + /* Did we succeded or run out of retries? */ + if (ret) + goto err_log_capture; + + ret = guc_enable_communication(guc); + if (ret) + goto err_log_capture; + + intel_huc_auth(huc); + + ret = intel_guc_sample_forcewake(guc); + if (ret) + goto err_communication; + + if (intel_uc_supports_guc_submission(uc)) + intel_guc_submission_enable(guc); + + dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, + guc->fw.major_ver_found, guc->fw.minor_ver_found, + "submission", + enableddisabled(intel_uc_supports_guc_submission(uc))); + + if (intel_uc_uses_huc(uc)) { + dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), + huc->fw.path, + huc->fw.major_ver_found, huc->fw.minor_ver_found, + "authenticated", + yesno(intel_huc_is_authenticated(huc))); + } + + return 0; + + /* + * We've failed to load the firmware :( + */ +err_communication: + guc_disable_communication(guc); +err_log_capture: + __uc_capture_load_err_log(uc); +err_out: + __uc_sanitize(uc); + + if (!ret) { + dev_notice(i915->drm.dev, "GuC is uninitialized\n"); + /* We want to run without GuC submission */ + return 0; + } + + i915_probe_error(i915, "GuC initialization failed %d\n", ret); + + /* We want to keep KMS alive */ + return -EIO; +} + +static void __uc_fini_hw(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (!intel_guc_is_running(guc)) + return; + + if (intel_uc_supports_guc_submission(uc)) + intel_guc_submission_disable(guc); + + if (guc_communication_enabled(guc)) + guc_disable_communication(guc); + + __uc_sanitize(uc); +} + +/** + * intel_uc_reset_prepare - Prepare for reset + * @uc: the intel_uc structure + * + * Preparing for full gpu reset. + */ +void intel_uc_reset_prepare(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (!intel_guc_is_running(guc)) + return; + + guc_disable_communication(guc); + __uc_sanitize(uc); +} + +void intel_uc_runtime_suspend(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + int err; + + if (!intel_guc_is_running(guc)) + return; + + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + + guc_disable_communication(guc); +} + +void intel_uc_suspend(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + intel_wakeref_t wakeref; + + if (!intel_guc_is_running(guc)) + return; + + with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) + intel_uc_runtime_suspend(uc); +} + +static int __uc_resume(struct intel_uc *uc, bool enable_communication) +{ + struct intel_guc *guc = &uc->guc; + int err; + + if (!intel_guc_is_running(guc)) + return 0; + + /* Make sure we enable communication if and only if it's disabled */ + GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + + if (enable_communication) + guc_enable_communication(guc); + + err = intel_guc_resume(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); + return err; + } + + return 0; +} + +int intel_uc_resume(struct intel_uc *uc) +{ + /* + * When coming out of S3/S4 we sanitize and re-init the HW, so + * communication is already re-enabled at this point. + */ + return __uc_resume(uc, false); +} + +int intel_uc_runtime_resume(struct intel_uc *uc) +{ + /* + * During runtime resume we don't sanitize, so we need to re-init + * communication as well. + */ + return __uc_resume(uc, true); +} + +static const struct intel_uc_ops uc_ops_off = { + .init_hw = __uc_check_hw, +}; + +static const struct intel_uc_ops uc_ops_on = { + .sanitize = __uc_sanitize, + + .init_fw = __uc_fetch_firmwares, + .fini_fw = __uc_cleanup_firmwares, + + .init = __uc_init, + .fini = __uc_fini, + + .init_hw = __uc_init_hw, + .fini_hw = __uc_fini_hw, +}; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h new file mode 100644 index 000000000000..49c913524686 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_UC_H_ +#define _INTEL_UC_H_ + +#include "intel_guc.h" +#include "intel_huc.h" +#include "i915_params.h" + +struct intel_uc; + +struct intel_uc_ops { + int (*sanitize)(struct intel_uc *uc); + void (*init_fw)(struct intel_uc *uc); + void (*fini_fw)(struct intel_uc *uc); + void (*init)(struct intel_uc *uc); + void (*fini)(struct intel_uc *uc); + int (*init_hw)(struct intel_uc *uc); + void (*fini_hw)(struct intel_uc *uc); +}; + +struct intel_uc { + struct intel_uc_ops const *ops; + struct intel_guc guc; + struct intel_huc huc; + + /* Snapshot of GuC log from last failed load */ + struct drm_i915_gem_object *load_err_log; +}; + +void intel_uc_init_early(struct intel_uc *uc); +void intel_uc_driver_late_release(struct intel_uc *uc); +void intel_uc_init_mmio(struct intel_uc *uc); +void intel_uc_reset_prepare(struct intel_uc *uc); +void intel_uc_suspend(struct intel_uc *uc); +void intel_uc_runtime_suspend(struct intel_uc *uc); +int intel_uc_resume(struct intel_uc *uc); +int intel_uc_runtime_resume(struct intel_uc *uc); + +static inline bool intel_uc_supports_guc(struct intel_uc *uc) +{ + return intel_guc_is_supported(&uc->guc); +} + +static inline bool intel_uc_uses_guc(struct intel_uc *uc) +{ + return intel_guc_is_enabled(&uc->guc); +} + +static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc) +{ + return intel_guc_is_submission_supported(&uc->guc); +} + +static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc) +{ + return intel_guc_is_submission_supported(&uc->guc); +} + +static inline bool intel_uc_supports_huc(struct intel_uc *uc) +{ + return intel_uc_supports_guc(uc); +} + +static inline bool intel_uc_uses_huc(struct intel_uc *uc) +{ + return intel_huc_is_enabled(&uc->huc); +} + +#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ +static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ +{ \ + if (uc->ops->_OPS) \ + return uc->ops->_OPS(uc); \ + return _RET; \ +} +intel_uc_ops_function(sanitize, sanitize, int, 0); +intel_uc_ops_function(fetch_firmwares, init_fw, void, ); +intel_uc_ops_function(cleanup_firmwares, fini_fw, void, ); +intel_uc_ops_function(init, init, void, ); +intel_uc_ops_function(fini, fini, void, ); +intel_uc_ops_function(init_hw, init_hw, int, 0); +intel_uc_ops_function(fini_hw, fini_hw, void, ); +#undef intel_uc_ops_function + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c new file mode 100644 index 000000000000..8ee0a0c7f447 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/firmware.h> +#include <drm/drm_print.h> + +#include "intel_uc_fw.h" +#include "intel_uc_fw_abi.h" +#include "i915_drv.h" + +static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) +{ + GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + return container_of(uc_fw, struct intel_gt, uc.guc.fw); + + GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC); + return container_of(uc_fw, struct intel_gt, uc.huc.fw); +} + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status) +{ + uc_fw->__status = status; + DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev, + "%s firmware -> %s\n", + intel_uc_fw_type_repr(uc_fw->type), + status == INTEL_UC_FIRMWARE_SELECTED ? + uc_fw->path : intel_uc_fw_status_repr(status)); +} +#endif + +/* + * List of required GuC and HuC binaries per-platform. + * Must be ordered based on platform + revid, from newer to older. + * + * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas + * between 33.0 and 35.2 are only related to new additions to support new Gen12 + * features. + */ +#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ + "i915/" \ + __stringify(prefix_) name_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ + __stringify(patch_) ".bin" + +#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) + +#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ + MODULE_FIRMWARE(guc_); \ + MODULE_FIRMWARE(huc_); + +INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH) + +/* The below structs and macros are used to iterate across the list of blobs */ +struct __packed uc_fw_blob { + u8 major; + u8 minor; + const char *path; +}; + +#define UC_FW_BLOB(major_, minor_, path_) \ + { .major = major_, .minor = minor_, .path = path_ } + +#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) + +struct __packed uc_fw_platform_requirement { + enum intel_platform p; + u8 rev; /* first platform rev using this FW */ + const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES]; +}; + +#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \ +{ \ + .p = INTEL_##platform_, \ + .rev = revid_, \ + .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \ + .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \ +}, + +static void +__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) +{ + static const struct uc_fw_platform_requirement fw_blobs[] = { + INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) + }; + int i; + + for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { + if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { + const struct uc_fw_blob *blob = + &fw_blobs[i].blobs[uc_fw->type]; + uc_fw->path = blob->path; + uc_fw->major_ver_wanted = blob->major; + uc_fw->minor_ver_wanted = blob->minor; + break; + } + } + + /* make sure the list is ordered as expected */ + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { + for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) { + if (fw_blobs[i].p < fw_blobs[i - 1].p) + continue; + + if (fw_blobs[i].p == fw_blobs[i - 1].p && + fw_blobs[i].rev < fw_blobs[i - 1].rev) + continue; + + pr_err("invalid FW blob order: %s r%u comes before %s r%u\n", + intel_platform_name(fw_blobs[i - 1].p), + fw_blobs[i - 1].rev, + intel_platform_name(fw_blobs[i].p), + fw_blobs[i].rev); + + uc_fw->path = NULL; + } + } + + /* We don't want to enable GuC/HuC on pre-Gen11 by default */ + if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE) + uc_fw->path = NULL; +} + +static const char *__override_guc_firmware_path(void) +{ + if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + return i915_modparams.guc_firmware_path; + return ""; +} + +static const char *__override_huc_firmware_path(void) +{ + if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC) + return i915_modparams.huc_firmware_path; + return ""; +} + +static void __uc_fw_user_override(struct intel_uc_fw *uc_fw) +{ + const char *path = NULL; + + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + path = __override_guc_firmware_path(); + break; + case INTEL_UC_FW_TYPE_HUC: + path = __override_huc_firmware_path(); + break; + } + + if (unlikely(path)) { + uc_fw->path = path; + uc_fw->user_overridden = true; + } +} + +/** + * intel_uc_fw_init_early - initialize the uC object and select the firmware + * @uc_fw: uC firmware + * @type: type of uC + * @supported: is uC support possible + * @platform: platform identifier + * @rev: hardware revision + * + * Initialize the state of our uC object and relevant tracking and select the + * firmware to fetch and load. + */ +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type, bool supported, + enum intel_platform platform, u8 rev) +{ + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED); + GEM_BUG_ON(uc_fw->status); + GEM_BUG_ON(uc_fw->path); + + uc_fw->type = type; + + if (supported) { + __uc_fw_auto_select(uc_fw, platform, rev); + __uc_fw_user_override(uc_fw); + } + + intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + INTEL_UC_FIRMWARE_SELECTED : + INTEL_UC_FIRMWARE_DISABLED : + INTEL_UC_FIRMWARE_NOT_SUPPORTED); +} + +static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) +{ + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + bool user = e == -EINVAL; + + if (i915_inject_probe_error(i915, e)) { + /* non-existing blob */ + uc_fw->path = "<invalid>"; + uc_fw->user_overridden = user; + } else if (i915_inject_probe_error(i915, e)) { + /* require next major version */ + uc_fw->major_ver_wanted += 1; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = user; + } else if (i915_inject_probe_error(i915, e)) { + /* require next minor version */ + uc_fw->minor_ver_wanted += 1; + uc_fw->user_overridden = user; + } else if (uc_fw->major_ver_wanted && + i915_inject_probe_error(i915, e)) { + /* require prev major version */ + uc_fw->major_ver_wanted -= 1; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = user; + } else if (uc_fw->minor_ver_wanted && + i915_inject_probe_error(i915, e)) { + /* require prev minor version - hey, this should work! */ + uc_fw->minor_ver_wanted -= 1; + uc_fw->user_overridden = user; + } else if (user && i915_inject_probe_error(i915, e)) { + /* officially unsupported platform */ + uc_fw->major_ver_wanted = 0; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = true; + } +} + +/** + * intel_uc_fw_fetch - fetch uC firmware + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + struct device *dev = i915->drm.dev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); + + err = i915_inject_probe_error(i915, -ENXIO); + if (err) + return err; + + __force_fw_fetch_failures(uc_fw, -EINVAL); + __force_fw_fetch_failures(uc_fw, -ESTALE); + + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + /* Check the size of the blob before examining buffer contents */ + if (unlikely(fw->size < sizeof(struct uc_css_header))) { + dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Check integrity of size values inside CSS header */ + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - + css->exponent_size_dw) * sizeof(u32); + if (unlikely(size != sizeof(struct uc_css_header))) { + dev_warn(dev, + "%s firmware %s: unexpected header size: %zu != %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -EPROTO; + goto fail; + } + + /* uCode size must calculated from other sizes */ + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) { + dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + css->key_size_dw, UOS_RSA_SCRATCH_COUNT); + err = -EPROTO; + goto fail; + } + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; + if (unlikely(fw->size < size)) { + dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* Sanity check whether this fw is not larger than whole WOPCM memory */ + size = __intel_uc_fw_get_upload_size(uc_fw); + if (unlikely(size >= i915->wopcm.size)) { + dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + size, (size_t)i915->wopcm.size); + err = -E2BIG; + goto fail; + } + + /* Get version numbers from the CSS header */ + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); + + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (!intel_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } + + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); + + release_firmware(fw); + return 0; + +fail: + intel_uc_fw_change_status(uc_fw, err == -ENOENT ? + INTEL_UC_FIRMWARE_MISSING : + INTEL_UC_FIRMWARE_ERROR); + + dev_notice(dev, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + dev_info(dev, "%s firmware(s) can be downloaded from %s\n", + intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + return err; +} + +static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) +{ + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + struct drm_mm_node *node = &ggtt->uc_fw; + + GEM_BUG_ON(!drm_mm_node_allocated(node)); + GEM_BUG_ON(upper_32_bits(node->start)); + GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + + return lower_32_bits(node->start); +} + +static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + struct i915_vma dummy = { + .node.start = uc_fw_ggtt_offset(uc_fw), + .node.size = obj->base.size, + .pages = obj->mm.pages, + .vm = &ggtt->vm, + }; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + + /* uc_fw->obj cache domains were not controlled across suspend */ + drm_clflush_sg(dummy.pages); + + ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); +} + +static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + u64 start = uc_fw_ggtt_offset(uc_fw); + + ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); +} + +static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct intel_uncore *uncore = gt->uncore; + u64 offset; + int ret; + + ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT); + if (ret) + return ret; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + /* Set the source address for the uCode */ + offset = uc_fw_ggtt_offset(uc_fw); + GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000); + intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); + intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset)); + + /* Set the DMA destination */ + intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset); + intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* + * Set the transfer size. The header plus uCode will be copied to WOPCM + * via DMA, excluding any other components + */ + intel_uncore_write_fw(uncore, DMA_COPY_SIZE, + sizeof(struct uc_css_header) + uc_fw->ucode_size); + + /* Start the DMA */ + intel_uncore_write_fw(uncore, DMA_CTRL, + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); + + /* Wait for DMA to finish */ + ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100); + if (ret) + dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uncore_read_fw(uncore, DMA_CTRL)); + + /* Disable the bits once DMA is over */ + intel_uncore_write_fw(uncore, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + return ret; +} + +/** + * intel_uc_fw_upload - load uC firmware using custom loader + * @uc_fw: uC firmware + * @dst_offset: destination offset + * @dma_flags: flags for flags for dma ctrl + * + * Loads uC firmware and updates internal flags. + * + * Return: 0 on success, non-zero on failure. + */ +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + int err; + + /* make sure the status was cleared the last time we reset the uc */ + GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); + + err = i915_inject_probe_error(gt->i915, -ENOEXEC); + if (err) + return err; + + if (!intel_uc_fw_is_available(uc_fw)) + return -ENOEXEC; + + /* Call custom loader */ + uc_fw_bind_ggtt(uc_fw); + err = uc_fw_xfer(uc_fw, dst_offset, dma_flags); + uc_fw_unbind_ggtt(uc_fw); + if (err) + goto fail; + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_TRANSFERRED); + return 0; + +fail: + i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + return err; +} + +int intel_uc_fw_init(struct intel_uc_fw *uc_fw) +{ + int err; + + /* this should happen before the load! */ + GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); + + if (!intel_uc_fw_is_available(uc_fw)) + return -ENOEXEC; + + err = i915_gem_object_pin_pages(uc_fw->obj); + if (err) { + DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + } + + return err; +} + +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + intel_uc_fw_cleanup_fetch(uc_fw); +} + +/** + * intel_uc_fw_cleanup_fetch - cleanup uC firmware + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return; + + i915_gem_object_put(fetch_and_zero(&uc_fw->obj)); + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_SELECTED); +} + +/** + * intel_uc_fw_copy_rsa - copy fw RSA to buffer + * + * @uc_fw: uC firmware + * @dst: dst buffer + * @max_len: max number of bytes to copy + * + * Return: number of copied bytes. + */ +size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) +{ + struct sg_table *pages = uc_fw->obj->mm.pages; + u32 size = min_t(u32, uc_fw->rsa_size, max_len); + u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size; + + GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw)); + + return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset); +} + +/** + * intel_uc_fw_dump - dump information about uC firmware + * @uc_fw: uC firmware + * @p: the &drm_printer + * + * Pretty printer for uC firmware. + */ +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: %s\n", + intel_uc_fw_status_repr(uc_fw->status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h new file mode 100644 index 000000000000..1f30543d0d2d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +#include <linux/types.h> +#include "intel_uc_fw_abi.h" +#include "intel_device_info.h" +#include "i915_gem.h" + +struct drm_printer; +struct drm_i915_private; +struct intel_gt; + +/* Home of GuC, HuC and DMC firmwares */ +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" + +/* + * +------------+---------------------------------------------------+ + * | PHASE | FIRMWARE STATUS TRANSITIONS | + * +============+===================================================+ + * | | UNINITIALIZED | + * +------------+- / | \ -+ + * | | DISABLED <--/ | \--> NOT_SUPPORTED | + * | init_early | V | + * | | SELECTED | + * +------------+- / | \ -+ + * | | MISSING <--/ | \--> ERROR | + * | fetch | | | + * | | /------> AVAILABLE <---<-----------\ | + * +------------+- \ / \ \ \ -+ + * | | FAIL <--< \--> TRANSFERRED \ | + * | upload | \ / \ / | + * | | \---------/ \--> RUNNING | + * +------------+---------------------------------------------------+ + */ + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ + INTEL_UC_FIRMWARE_DISABLED, /* disabled */ + INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ + INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ + INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ + INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ + INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC = 0, + INTEL_UC_FW_TYPE_HUC +}; +#define INTEL_UC_FW_NUM_TYPES 2 + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + enum intel_uc_fw_type type; + union { + const enum intel_uc_fw_status status; + enum intel_uc_fw_status __status; /* no accidental overwrites */ + }; + const char *path; + bool user_overridden; + size_t size; + struct drm_i915_gem_object *obj; + + /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; + + u32 rsa_size; + u32 ucode_size; +}; + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status); +#else +static inline void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status) +{ + uc_fw->__status = status; +} +#endif + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_NOT_SUPPORTED: + return "N/A"; + case INTEL_UC_FIRMWARE_UNINITIALIZED: + return "UNINITIALIZED"; + case INTEL_UC_FIRMWARE_DISABLED: + return "DISABLED"; + case INTEL_UC_FIRMWARE_SELECTED: + return "SELECTED"; + case INTEL_UC_FIRMWARE_MISSING: + return "MISSING"; + case INTEL_UC_FIRMWARE_ERROR: + return "ERROR"; + case INTEL_UC_FIRMWARE_AVAILABLE: + return "AVAILABLE"; + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_TRANSFERRED: + return "TRANSFERRED"; + case INTEL_UC_FIRMWARE_RUNNING: + return "RUNNING"; + } + return "<invalid>"; +} + +static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_NOT_SUPPORTED: + return -ENODEV; + case INTEL_UC_FIRMWARE_UNINITIALIZED: + return -EACCES; + case INTEL_UC_FIRMWARE_DISABLED: + return -EPERM; + case INTEL_UC_FIRMWARE_MISSING: + return -ENOENT; + case INTEL_UC_FIRMWARE_ERROR: + return -ENOEXEC; + case INTEL_UC_FIRMWARE_FAIL: + return -EIO; + case INTEL_UC_FIRMWARE_SELECTED: + return -ESTALE; + case INTEL_UC_FIRMWARE_AVAILABLE: + case INTEL_UC_FIRMWARE_TRANSFERRED: + case INTEL_UC_FIRMWARE_RUNNING: + return 0; + } + return -EINVAL; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline enum intel_uc_fw_status +__intel_uc_fw_status(struct intel_uc_fw *uc_fw) +{ + /* shouldn't call this before checking hw/blob availability */ + GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); + return uc_fw->status; +} + +static inline bool intel_uc_fw_is_supported(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) != INTEL_UC_FIRMWARE_NOT_SUPPORTED; +} + +static inline bool intel_uc_fw_is_enabled(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) > INTEL_UC_FIRMWARE_DISABLED; +} + +static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE; +} + +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED; +} + +static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING; +} + +static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) +{ + return uc_fw->user_overridden; +} + +static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) +{ + if (intel_uc_fw_is_loaded(uc_fw)) + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); +} + +static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +/** + * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return 0; + + return __intel_uc_fw_get_upload_size(uc_fw); +} + +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type, bool supported, + enum intel_platform platform, u8 rev); +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); +int intel_uc_fw_init(struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h new file mode 100644 index 000000000000..029214cdedd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_ABI_H +#define _INTEL_UC_FW_ABI_H + +#include <linux/types.h> +#include <linux/build_bug.h> + +/** + * DOC: Firmware Layout + * + * The GuC/HuC firmware layout looks like this:: + * + * +======================================================================+ + * | Firmware blob | + * +===============+===============+============+============+============+ + * | CSS header | uCode | RSA key | modulus | exponent | + * +===============+===============+============+============+============+ + * <-header size-> <---header size continued -----------> + * <--- size -----------------------------------------------------------> + * <-key size-> + * <-mod size-> + * <-exp size-> + * + * The firmware may or may not have modulus key and exponent data. The header, + * uCode and RSA signature are must-have components that will be used by driver. + * Length of each components, which is all in dwords, can be found in header. + * In the case that modulus and exponent are not present in fw, a.k.a truncated + * image, the length value still appears in header. + * + * Driver will do some basic fw size validation based on the following rules: + * + * 1. Header, uCode and RSA are must-have components. + * 2. All firmware components, if they present, are in the sequence illustrated + * in the layout table above. + * 3. Length info of each component can be found in header, in dwords. + * 4. Modulus and exponent key are not required by driver. They may not appear + * in fw. So driver will load a truncated firmware in this case. + */ + +struct uc_css_header { + u32 module_type; + /* + * header_size includes all non-uCode bits, including css_header, rsa + * key, modulus key and exponent data. + */ + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; + u32 date; +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_DATE_MIN (0xFF << 8) +#define CSS_DATE_SEC (0xFFFF << 16) + char username[8]; + char buildnumber[12]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 reserved[14]; + u32 header_info; +} __packed; +static_assert(sizeof(struct uc_css_header) == 128); + +#endif /* _INTEL_UC_FW_ABI_H */ diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index c3d19d88da40..771420453f82 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) flags = PIN_MAPPABLE; } - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); mmio_hw_access_pre(dev_priv); ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); mmio_hw_access_post(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); if (ret) gvt_err("fail to alloc %s gm space from host\n", high_gm ? "high" : "low"); @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu) return 0; out_free_aperture: - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); return ret; } @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); drm_mm_remove_node(&vgpu->gm.high_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); } /** @@ -172,14 +172,14 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); _clear_vgpu_fence(vgpu); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; i915_unreserve_fence(reg); vgpu->fence.regs[i] = NULL; } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } @@ -195,10 +195,10 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_get(rpm); /* Request fences from host */ - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { - reg = i915_reserve_fence(dev_priv); + reg = i915_reserve_fence(&dev_priv->ggtt); if (IS_ERR(reg)) goto out_free_fence; @@ -207,7 +207,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(rpm); return 0; out_free_fence: @@ -220,7 +220,7 @@ out_free_fence: i915_unreserve_fence(reg); vgpu->fence.regs[i] = NULL; } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(rpm); return -ENOSPC; } diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b09dc315e2da..21a176cd8acc 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -35,7 +35,9 @@ */ #include <linux/slab.h> + #include "i915_drv.h" +#include "gt/intel_ring.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" @@ -374,21 +376,37 @@ typedef int (*parser_cmd_handler)(struct parser_exec_state *s); #define ADDR_FIX_4(x1, x2, x3, x4) (ADDR_FIX_1(x1) | ADDR_FIX_3(x2, x3, x4)) #define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5)) +#define DWORD_FIELD(dword, end, start) \ + FIELD_GET(GENMASK(end, start), cmd_val(s, dword)) + +#define OP_LENGTH_BIAS 2 +#define CMD_LEN(value) (value + OP_LENGTH_BIAS) + +static int gvt_check_valid_cmd_length(int len, int valid_len) +{ + if (valid_len != len) { + gvt_err("len is not valid: len=%u valid_len=%u\n", + len, valid_len); + return -EFAULT; + } + return 0; +} + struct cmd_info { const char *name; u32 opcode; -#define F_LEN_MASK (1U<<0) +#define F_LEN_MASK 3U #define F_LEN_CONST 1U #define F_LEN_VAR 0U +/* value is const although LEN maybe variable */ +#define F_LEN_VAR_FIXED (1<<1) /* * command has its own ip advance logic * e.g. MI_BATCH_START, MI_BATCH_END */ -#define F_IP_ADVANCE_CUSTOM (1<<1) - -#define F_POST_HANDLE (1<<2) +#define F_IP_ADVANCE_CUSTOM (1<<2) u32 flag; #define R_RCS BIT(RCS0) @@ -418,9 +436,12 @@ struct cmd_info { * flag == F_LEN_VAR : length bias bits * Note: length is in DWord */ - u8 len; + u32 len; parser_cmd_handler handler; + + /* valid length in DWord */ + u32 valid_len; }; struct cmd_entry { @@ -944,6 +965,18 @@ static int cmd_handler_lri(struct parser_exec_state *s) int i, ret = 0; int cmd_len = cmd_length(s); struct intel_gvt *gvt = s->vgpu->gvt; + u32 valid_len = CMD_LEN(1); + + /* + * Official intel docs are somewhat sloppy , check the definition of + * MI_LOAD_REGISTER_IMM. + */ + #define MAX_VALID_LEN 127 + if ((cmd_len < valid_len) || (cmd_len > MAX_VALID_LEN)) { + gvt_err("len is not valid: len=%u valid_len=%u\n", + cmd_len, valid_len); + return -EFAULT; + } for (i = 1; i < cmd_len; i += 2) { if (IS_BROADWELL(gvt->dev_priv) && s->ring_id != RCS0) { @@ -1375,6 +1408,15 @@ static int cmd_handler_mi_display_flip(struct parser_exec_state *s) int ret; int i; int len = cmd_length(s); + u32 valid_len = CMD_LEN(1); + + /* Flip Type == Stereo 3D Flip */ + if (DWORD_FIELD(2, 1, 0) == 2) + valid_len++; + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; ret = decode_mi_display_flip(s, &info); if (ret) { @@ -1494,12 +1536,21 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s) int op_size = (cmd_length(s) - 3) * sizeof(u32); int core_id = (cmd_val(s, 2) & (1 << 0)) ? 1 : 0; unsigned long gma, gma_low, gma_high; + u32 valid_len = CMD_LEN(2); int ret = 0; /* check ppggt */ if (!(cmd_val(s, 0) & (1 << 22))) return 0; + /* check if QWORD */ + if (DWORD_FIELD(0, 21, 21)) + valid_len++; + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; + gma = cmd_val(s, 2) & GENMASK(31, 2); if (gmadr_bytes == 8) { @@ -1542,11 +1593,20 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s) int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) * sizeof(u32); unsigned long gma, gma_high; + u32 valid_len = CMD_LEN(1); int ret = 0; if (!(cmd_val(s, 0) & (1 << 22))) return ret; + /* check inline data */ + if (cmd_val(s, 0) & BIT(18)) + valid_len = CMD_LEN(9); + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; + gma = cmd_val(s, 1) & GENMASK(31, 2); if (gmadr_bytes == 8) { gma_high = cmd_val(s, 2) & GENMASK(15, 0); @@ -1584,6 +1644,16 @@ static int cmd_handler_mi_flush_dw(struct parser_exec_state *s) bool index_mode = false; int ret = 0; u32 hws_pga, val; + u32 valid_len = CMD_LEN(2); + + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) { + /* Check again for Qword */ + ret = gvt_check_valid_cmd_length(cmd_length(s), + ++valid_len); + return ret; + } /* Check post-sync and ppgtt bit */ if (((cmd_val(s, 0) >> 14) & 0x3) && (cmd_val(s, 1) & (1 << 2))) { @@ -1661,7 +1731,9 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) return 1; } -static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) +static int find_bb_size(struct parser_exec_state *s, + unsigned long *bb_size, + unsigned long *bb_end_cmd_offset) { unsigned long gma = 0; const struct cmd_info *info; @@ -1673,6 +1745,7 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; *bb_size = 0; + *bb_end_cmd_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); @@ -1708,6 +1781,10 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) /* chained batch buffer */ bb_end = true; } + + if (bb_end) + *bb_end_cmd_offset = *bb_size; + cmd_len = get_cmd_length(info, cmd) << 2; *bb_size += cmd_len; gma += cmd_len; @@ -1716,12 +1793,36 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) return 0; } +static int audit_bb_end(struct parser_exec_state *s, void *va) +{ + struct intel_vgpu *vgpu = s->vgpu; + u32 cmd = *(u32 *)va; + const struct cmd_info *info; + + info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); + if (info == NULL) { + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); + return -EBADRQC; + } + + if ((info->opcode == OP_MI_BATCH_BUFFER_END) || + ((info->opcode == OP_MI_BATCH_BUFFER_START) && + (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0))) + return 0; + + return -EBADRQC; +} + static int perform_bb_shadow(struct parser_exec_state *s) { struct intel_vgpu *vgpu = s->vgpu; struct intel_vgpu_shadow_bb *bb; unsigned long gma = 0; unsigned long bb_size; + unsigned long bb_end_cmd_offset; int ret = 0; struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; @@ -1732,7 +1833,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (gma == INTEL_GVT_INVALID_ADDR) return -EFAULT; - ret = find_bb_size(s, &bb_size); + ret = find_bb_size(s, &bb_size, &bb_end_cmd_offset); if (ret) return ret; @@ -1788,6 +1889,10 @@ static int perform_bb_shadow(struct parser_exec_state *s) goto err_unmap; } + ret = audit_bb_end(s, bb->va + start_offset + bb_end_cmd_offset); + if (ret) + goto err_unmap; + INIT_LIST_HEAD(&bb->list); list_add(&bb->list, &s->workload->shadow_bb); @@ -1912,21 +2017,24 @@ static const struct cmd_info cmd_info[] = { {"MI_RS_CONTEXT", OP_MI_RS_CONTEXT, F_LEN_CONST, R_RCS, D_ALL, 0, 1, NULL}, - {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR | F_POST_HANDLE, + {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR, R_RCS | R_BCS, D_ALL, 0, 8, cmd_handler_mi_display_flip}, - {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR, R_ALL, D_ALL, - 0, 8, NULL}, + {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR | F_LEN_VAR_FIXED, + R_ALL, D_ALL, 0, 8, NULL, CMD_LEN(1)}, {"MI_MATH", OP_MI_MATH, F_LEN_VAR, R_ALL, D_ALL, 0, 8, NULL}, - {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, + {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, + D_ALL, 0, 8, NULL, CMD_LEN(0)}, - {"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL, - D_BDW_PLUS, 0, 8, NULL}, + {"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, 0, 8, + NULL, CMD_LEN(0)}, - {"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL, - D_BDW_PLUS, ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait}, + {"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, ADDR_FIX_1(2), + 8, cmd_handler_mi_semaphore_wait, CMD_LEN(2)}, {"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm}, @@ -1940,8 +2048,9 @@ static const struct cmd_info cmd_info[] = { {"MI_UPDATE_GTT", OP_MI_UPDATE_GTT, F_LEN_VAR, R_ALL, D_BDW_PLUS, 0, 10, cmd_handler_mi_update_gtt}, - {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(2), 8, cmd_handler_srm}, + {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_srm, CMD_LEN(2)}, {"MI_FLUSH_DW", OP_MI_FLUSH_DW, F_LEN_VAR, R_ALL, D_ALL, 0, 6, cmd_handler_mi_flush_dw}, @@ -1949,26 +2058,30 @@ static const struct cmd_info cmd_info[] = { {"MI_CLFLUSH", OP_MI_CLFLUSH, F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(1), 10, cmd_handler_mi_clflush}, - {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(1), 6, cmd_handler_mi_report_perf_count}, + {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(1), 6, + cmd_handler_mi_report_perf_count, CMD_LEN(2)}, - {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(2), 8, cmd_handler_lrm}, + {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_lrm, CMD_LEN(2)}, - {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, F_LEN_VAR, R_ALL, - D_ALL, 0, 8, cmd_handler_lrr}, + {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, 0, 8, + cmd_handler_lrr, CMD_LEN(1)}, - {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, F_LEN_VAR, R_RCS, - D_ALL, 0, 8, NULL}, + {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, D_ALL, 0, + 8, NULL, CMD_LEN(2)}, - {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR, R_RCS, D_ALL, - ADDR_FIX_1(2), 8, NULL}, + {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR | F_LEN_VAR_FIXED, + R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL, CMD_LEN(2)}, {"MI_STORE_URM_MEM", OP_MI_STORE_URM_MEM, F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL}, - {"MI_OP_2E", OP_MI_2E, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_2(1, 2), - 8, cmd_handler_mi_op_2e}, + {"MI_OP_2E", OP_MI_2E, F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, + ADDR_FIX_2(1, 2), 8, cmd_handler_mi_op_2e, CMD_LEN(3)}, {"MI_OP_2F", OP_MI_2F, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1), 8, cmd_handler_mi_op_2f}, @@ -1978,8 +2091,8 @@ static const struct cmd_info cmd_info[] = { cmd_handler_mi_batch_buffer_start}, {"MI_CONDITIONAL_BATCH_BUFFER_END", OP_MI_CONDITIONAL_BATCH_BUFFER_END, - F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(2), 8, - cmd_handler_mi_conditional_batch_buffer_end}, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_mi_conditional_batch_buffer_end, CMD_LEN(2)}, {"MI_LOAD_SCAN_LINES_INCL", OP_MI_LOAD_SCAN_LINES_INCL, F_LEN_CONST, R_RCS | R_BCS, D_ALL, 0, 2, NULL}, @@ -2569,6 +2682,13 @@ static int cmd_parser_exec(struct parser_exec_state *s) cmd_length(s), s->buf_type, s->buf_addr_type, s->workload, info->name); + if ((info->flag & F_LEN_MASK) == F_LEN_VAR_FIXED) { + ret = gvt_check_valid_cmd_length(cmd_length(s), + info->valid_len); + if (ret) + return ret; + } + if (info->handler) { ret = info->handler(s); if (ret < 0) { diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h index 286703643002..ab25d151932a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h @@ -38,6 +38,10 @@ #define GVT_CMD_HASH_BITS 7 +struct intel_gvt; +struct intel_shadow_wa_ctx; +struct intel_vgpu_workload; + void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt); int intel_gvt_init_cmd_parser(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 2fb7b73b260d..285f6011a537 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -189,36 +189,19 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU * @vgpu: a vGPU - * - * Returns: - * Zero on success, negative error code if failed. */ -int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) +void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) { - struct dentry *ent; char name[16] = ""; snprintf(name, 16, "vgpu%d", vgpu->id); vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); - if (!vgpu->debugfs) - return -ENOMEM; - - ent = debugfs_create_bool("active", 0444, vgpu->debugfs, - &vgpu->active); - if (!ent) - return -ENOMEM; - - ent = debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, - vgpu, &vgpu_mmio_diff_fops); - if (!ent) - return -ENOMEM; - ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, - vgpu, &vgpu_scan_nonprivbb_fops); - if (!ent) - return -ENOMEM; - - return 0; + debugfs_create_bool("active", 0444, vgpu->debugfs, &vgpu->active); + debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu, + &vgpu_mmio_diff_fops); + debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, vgpu, + &vgpu_scan_nonprivbb_fops); } /** @@ -234,27 +217,15 @@ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) /** * intel_gvt_debugfs_init - register gvt debugfs root entry * @gvt: GVT device - * - * Returns: - * zero on success, negative if failed. */ -int intel_gvt_debugfs_init(struct intel_gvt *gvt) +void intel_gvt_debugfs_init(struct intel_gvt *gvt) { struct drm_minor *minor = gvt->dev_priv->drm.primary; - struct dentry *ent; gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root); - if (!gvt->debugfs_root) { - gvt_err("Cannot create debugfs dir\n"); - return -ENOMEM; - } - ent = debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, - &gvt->mmio.num_tracked_mmio); - if (!ent) - return -ENOMEM; - - return 0; + debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, + &gvt->mmio.num_tracked_mmio); } /** diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index a87f33e6a23c..b59b34046e1e 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -35,6 +35,11 @@ #ifndef _GVT_DISPLAY_H_ #define _GVT_DISPLAY_H_ +#include <linux/types.h> + +struct intel_gvt; +struct intel_vgpu; + #define SBI_REG_MAX 20 #define DPCD_SIZE 0x700 diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 41c8ebc60c63..2477a1e5a166 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -36,13 +36,32 @@ #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) +static int vgpu_pin_dma_address(struct intel_vgpu *vgpu, + unsigned long size, + dma_addr_t dma_addr) +{ + int ret = 0; + + if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr)) + ret = -EINVAL; + + return ret; +} + +static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr); +} + static int vgpu_gem_get_pages( struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct intel_vgpu *vgpu; struct sg_table *st; struct scatterlist *sg; - int i, ret; + int i, j, ret; gen8_pte_t __iomem *gtt_entries; struct intel_vgpu_fb_info *fb_info; u32 page_num; @@ -51,6 +70,10 @@ static int vgpu_gem_get_pages( if (WARN_ON(!fb_info)) return -ENODEV; + vgpu = fb_info->obj->vgpu; + if (WARN_ON(!vgpu)) + return -ENODEV; + st = kmalloc(sizeof(*st), GFP_KERNEL); if (unlikely(!st)) return -ENOMEM; @@ -64,21 +87,53 @@ static int vgpu_gem_get_pages( gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + (fb_info->start >> PAGE_SHIFT); for_each_sg(st->sgl, sg, page_num, i) { + dma_addr_t dma_addr = + GEN8_DECODE_PTE(readq(>t_entries[i])); + if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) { + ret = -EINVAL; + goto out; + } + sg->offset = 0; sg->length = PAGE_SIZE; - sg_dma_address(sg) = - GEN8_DECODE_PTE(readq(>t_entries[i])); sg_dma_len(sg) = PAGE_SIZE; + sg_dma_address(sg) = dma_addr; } __i915_gem_object_set_pages(obj, st, PAGE_SIZE); +out: + if (ret) { + dma_addr_t dma_addr; + + for_each_sg(st->sgl, sg, i, j) { + dma_addr = sg_dma_address(sg); + if (dma_addr) + vgpu_unpin_dma_address(vgpu, dma_addr); + } + sg_free_table(st); + kfree(st); + } + + return ret; - return 0; } static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + struct scatterlist *sg; + + if (obj->base.dma_buf) { + struct intel_vgpu_fb_info *fb_info = obj->gvt_info; + struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; + struct intel_vgpu *vgpu = obj->vgpu; + int i; + + for_each_sg(pages->sgl, sg, fb_info->size, i) + vgpu_unpin_dma_address(vgpu, + sg_dma_address(sg)); + } + sg_free_table(pages); kfree(pages); } @@ -152,6 +207,7 @@ static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, struct intel_vgpu_fb_info *info) { + static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; @@ -161,7 +217,8 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); - i915_gem_object_init(obj, &intel_vgpu_gem_ops); + i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); + i915_gem_object_set_readonly(obj); obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; @@ -491,15 +548,13 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) obj->gvt_info = dmabuf_obj->info; - dmabuf = i915_gem_prime_export(dev, &obj->base, DRM_CLOEXEC | DRM_RDWR); + dmabuf = i915_gem_prime_export(&obj->base, DRM_CLOEXEC | DRM_RDWR); if (IS_ERR(dmabuf)) { gvt_vgpu_err("export dma-buf failed\n"); ret = PTR_ERR(dmabuf); goto out_free_gem; } - i915_gem_object_put(obj); - ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR); if (ret < 0) { gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret); @@ -524,6 +579,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) file_count(dmabuf->file), kref_read(&obj->base.refcount)); + i915_gem_object_put(obj); + return dmabuf_fd; out_free_dmabuf: diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h index f6dfc8b795ec..dfe0cbc6aad8 100644 --- a/drivers/gpu/drm/i915/gvt/edid.h +++ b/drivers/gpu/drm/i915/gvt/edid.h @@ -35,6 +35,10 @@ #ifndef _GVT_EDID_H_ #define _GVT_EDID_H_ +#include <linux/types.h> + +struct intel_vgpu; + #define EDID_SIZE 128 #define EDID_ADDR 0x50 /* Linux hvm EDID addr */ diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f21b8fb5b37e..d6e7a1189bad 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -534,7 +534,7 @@ static void clean_execlist(struct intel_vgpu *vgpu, struct intel_vgpu_submission *s = &vgpu->submission; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { kfree(s->ring_scan_buffer[engine->id]); s->ring_scan_buffer[engine->id] = NULL; s->ring_scan_buffer_size[engine->id] = 0; @@ -548,7 +548,7 @@ static void reset_execlist(struct intel_vgpu *vgpu, struct intel_engine_cs *engine; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index 5ccc2c695848..5c0c1fd30c83 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -35,6 +35,8 @@ #ifndef _GVT_EXECLIST_H_ #define _GVT_EXECLIST_H_ +#include <linux/types.h> + struct execlist_ctx_descriptor_format { union { u32 ldw; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h index 60c155085029..67b6ede9e707 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.h +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h @@ -36,6 +36,8 @@ #ifndef _GVT_FB_DECODER_H_ #define _GVT_FB_DECODER_H_ +#include <linux/types.h> + #define _PLANE_CTL_FORMAT_SHIFT 24 #define _PLANE_CTL_TILED_SHIFT 10 #define _PIPE_V_SRCSZ_SHIFT 0 diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 049775e8e350..b0c1fda32977 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -146,7 +146,7 @@ void intel_gvt_free_firmware(struct intel_gvt *gvt) clean_firmware_sysfs(gvt); kfree(gvt->firmware.cfg_space); - kfree(gvt->firmware.mmio); + vfree(gvt->firmware.mmio); } static int verify_firmware(struct intel_gvt *gvt, @@ -229,7 +229,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt) firmware->cfg_space = mem; - mem = kmalloc(info->mmio_size, GFP_KERNEL); + mem = vmalloc(info->mmio_size); if (!mem) { kfree(path); kfree(firmware->cfg_space); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 4b04af569c05..4a4828074cb7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1282,7 +1282,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, return -EINVAL; default: GEM_BUG_ON(1); - }; + } /* direct shadow */ ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size, @@ -1956,7 +1956,11 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) if (mm->type == INTEL_GVT_MM_PPGTT) { list_del(&mm->ppgtt_mm.list); + + mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); list_del(&mm->ppgtt_mm.lru_list); + mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); + invalidate_ppgtt_mm(mm); } else { vfree(mm->ggtt_mm.virtual_ggtt); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 42d0394f0de2..88789316807d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -205,17 +205,18 @@ struct intel_vgpu_gtt { struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; }; -extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); -extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); +int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); +void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old); void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); -extern int intel_gvt_init_gtt(struct intel_gvt *gvt); +int intel_gvt_init_gtt(struct intel_gvt *gvt); void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu); -extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); +void intel_gvt_clean_gtt(struct intel_gvt *gvt); -extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level, void *root_entry); +struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, + int page_table_level, + void *root_entry); struct intel_vgpu_oos_page { struct intel_vgpu_ppgtt_spt *spt; diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 43f4242062dd..8f37eefa0a02 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -375,9 +375,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) } gvt->idle_vgpu = vgpu; - ret = intel_gvt_debugfs_init(gvt); - if (ret) - gvt_err("debugfs registration failed, go on.\n"); + intel_gvt_debugfs_init(gvt); gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 7a1fe44d45af..b47c6acaf9c0 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -334,6 +334,10 @@ struct intel_gvt { struct { struct engine_mmio *mmio; int ctx_mmio_count[I915_NUM_ENGINES]; + u32 *tlb_mmio_offset_list; + u32 tlb_mmio_offset_list_cnt; + u32 *mocs_mmio_offset_list; + u32 mocs_mmio_offset_list_cnt; } engine_mmio_list; struct dentry *debugfs_root; @@ -682,9 +686,9 @@ static inline void intel_gvt_mmio_set_in_ctx( gvt->mmio.mmio_attribute[offset >> 2] |= F_IN_CTX; } -int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); -int intel_gvt_debugfs_init(struct intel_gvt *gvt); +void intel_gvt_debugfs_init(struct intel_gvt *gvt); void intel_gvt_debugfs_clean(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 25f78196b964..6d28d72e6c7e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); engine_mask |= BIT(VCS1); } + if (data & GEN9_GRDOM_GUC) { + gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id); + vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET; + } engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask; } @@ -460,6 +464,7 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, static i915_reg_t force_nonpriv_white_list[] = { GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) + PS_INVOCATION_COUNT,//_MMIO(0x2348) GEN8_CS_CHICKEN1,//_MMIO(0x2580) _MMIO(0x2690), _MMIO(0x2694), @@ -508,7 +513,7 @@ static inline bool in_whitelist(unsigned int reg) static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - u32 reg_nonpriv = *(u32 *)p_data; + u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2); int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); u32 ring_base; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -528,7 +533,7 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, bytes); } else gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", - vgpu->id, reg_nonpriv, offset); + vgpu->id, *(u32 *)p_data, offset); return 0; } @@ -819,13 +824,16 @@ static int trigger_aux_channel_interrupt(struct intel_vgpu *vgpu, struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; enum intel_gvt_event_type event; - if (reg == _DPA_AUX_CH_CTL) + if (reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_A))) event = AUX_CHANNEL_A; - else if (reg == _PCH_DPB_AUX_CH_CTL || reg == _DPB_AUX_CH_CTL) + else if (reg == _PCH_DPB_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_B))) event = AUX_CHANNEL_B; - else if (reg == _PCH_DPC_AUX_CH_CTL || reg == _DPC_AUX_CH_CTL) + else if (reg == _PCH_DPC_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_C))) event = AUX_CHANNEL_C; - else if (reg == _PCH_DPD_AUX_CH_CTL || reg == _DPD_AUX_CH_CTL) + else if (reg == _PCH_DPD_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_D))) event = AUX_CHANNEL_D; else { WARN_ON(true); @@ -1632,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } +static int guc_status_read(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, + unsigned int bytes) +{ + /* keep MIA_IN_RESET before clearing */ + read_vreg(vgpu, offset, p_data, bytes); + vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET; + return 0; +} + static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2668,10 +2686,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); + MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL); + return 0; } -static int init_broadwell_mmio_info(struct intel_gvt *gvt) +static int init_bdw_mmio_info(struct intel_gvt *gvt) { struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; @@ -2796,7 +2816,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); - MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(_SRD_CTL_EDP), D_BDW); MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); @@ -2872,11 +2892,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_B), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_C), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_D), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); MMIO_D(HSW_PWR_WELL_CTL1, D_SKL_PLUS); @@ -3360,20 +3380,20 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) goto err; if (IS_BROADWELL(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; ret = init_skl_mmio_info(gvt); if (ret) goto err; } else if (IS_BROXTON(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; ret = init_skl_mmio_info(gvt); @@ -3417,6 +3437,10 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, } for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { + /* pvinfo data doesn't come from hw mmio */ + if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE) + continue; + for (j = 0; j < block->size; j += 4) { ret = handler(gvt, i915_mmio_reg_offset(block->offset) + j, diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 4862fb12778e..b17c4a1599cd 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -33,6 +33,10 @@ #ifndef _GVT_HYPERCALL_H_ #define _GVT_HYPERCALL_H_ +#include <linux/types.h> + +struct device; + enum hypervisor_type { INTEL_GVT_HYPERVISOR_XEN = 0, INTEL_GVT_HYPERVISOR_KVM, @@ -62,6 +66,8 @@ struct intel_gvt_mpt { unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 951681813230..11accd3e1023 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -672,7 +672,7 @@ void intel_gvt_clean_irq(struct intel_gvt *gvt) hrtimer_cancel(&irq->vblank_timer.timer); } -#define VBLNAK_TIMER_PERIOD 16000000 +#define VBLANK_TIMER_PERIOD 16000000 /** * intel_gvt_init_irq - initialize GVT-g IRQ emulation subsystem @@ -704,7 +704,7 @@ int intel_gvt_init_irq(struct intel_gvt *gvt) hrtimer_init(&vblank_timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); vblank_timer->timer.function = vblank_timer_fn; - vblank_timer->period = VBLNAK_TIMER_PERIOD; + vblank_timer->period = VBLANK_TIMER_PERIOD; return 0; } diff --git a/drivers/gpu/drm/i915/gvt/interrupt.h b/drivers/gpu/drm/i915/gvt/interrupt.h index 5313fb1b33e1..fcd663811d37 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.h +++ b/drivers/gpu/drm/i915/gvt/interrupt.h @@ -32,6 +32,8 @@ #ifndef _GVT_INTERRUPT_H_ #define _GVT_INTERRUPT_H_ +#include <linux/types.h> + enum intel_gvt_event_type { RCS_MI_USER_INTERRUPT = 0, RCS_DEBUG, @@ -135,6 +137,7 @@ enum intel_gvt_event_type { struct intel_gvt_irq; struct intel_gvt; +struct intel_vgpu; typedef void (*gvt_event_virt_handler_t)(struct intel_gvt_irq *irq, enum intel_gvt_event_type event, struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 23aa3e50cbf8..3259a1fa69e1 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1306,7 +1306,6 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, unsigned int i; int ret; struct vfio_region_info_cap_sparse_mmap *sparse = NULL; - size_t size; int nr_areas = 1; int cap_type_id; @@ -1349,9 +1348,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, VFIO_REGION_INFO_FLAG_WRITE; info.size = gvt_aperture_sz(vgpu->gvt); - size = sizeof(*sparse) + - (nr_areas * sizeof(*sparse->areas)); - sparse = kzalloc(size, GFP_KERNEL); + sparse = kzalloc(struct_size(sparse, areas, nr_areas), + GFP_KERNEL); if (!sparse) return -ENOMEM; @@ -1416,9 +1414,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, switch (cap_type_id) { case VFIO_REGION_INFO_CAP_SPARSE_MMAP: ret = vfio_info_add_capability(&caps, - &sparse->header, sizeof(*sparse) + - (sparse->nr_areas * - sizeof(*sparse->areas))); + &sparse->header, + struct_size(sparse, areas, + sparse->nr_areas)); if (ret) { kfree(sparse); return ret; @@ -1566,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "\n"); } -static ssize_t -hw_id_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mdev_device *mdev = mdev_from_dev(dev); - - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%u\n", - vgpu->submission.shadow[0]->gem_context->hw_id); - } - return sprintf(buf, "\n"); -} - static DEVICE_ATTR_RO(vgpu_id); -static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, - &dev_attr_hw_id.attr, NULL }; @@ -1798,9 +1779,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) "kvmgt_nr_cache_entries", 0444, vgpu->debugfs, &vgpu->vdev.nr_cache_entries); - if (!info->debugfs_cache_entries) - gvt_vgpu_err("Cannot create kvmgt debugfs entry\n"); - return 0; } @@ -1938,6 +1916,28 @@ err_unlock: return ret; } +static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct gvt_dma *entry; + int ret = 0; + + if (!handle_valid(handle)) + return -ENODEV; + + info = (struct kvmgt_guest_info *)handle; + + mutex_lock(&info->vgpu->vdev.cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_get(&entry->ref); + else + ret = -ENOMEM; + mutex_unlock(&info->vgpu->vdev.cache_lock); + + return ret; +} + static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); @@ -2049,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = { .gfn_to_mfn = kvmgt_gfn_to_pfn, .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, + .dma_pin_guest_page = kvmgt_dma_pin_guest_page, .set_opregion = kvmgt_set_opregion, .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 5874f1cb4306..2e68f4b02c94 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -36,6 +36,8 @@ #ifndef _GVT_MMIO_H_ #define _GVT_MMIO_H_ +#include <linux/types.h> + struct intel_gvt; struct intel_vgpu; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 2998999e8568..aaf15916d29a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" #include "gvt.h" #include "trace.h" @@ -148,19 +149,27 @@ static struct { u32 l3cc_table[GEN9_MOCS_SIZE / 2]; } gen9_render_mocs; +static u32 gen9_mocs_mmio_offset_list[] = { + [RCS0] = 0xc800, + [VCS0] = 0xc900, + [VCS1] = 0xca00, + [BCS0] = 0xcc00, + [VECS0] = 0xcb00, +}; + static void load_render_mocs(struct drm_i915_private *dev_priv) { + struct intel_gvt *gvt = dev_priv->gvt; i915_reg_t offset; - u32 regs[] = { - [RCS0] = 0xc800, - [VCS0] = 0xc900, - [VCS1] = 0xca00, - [BCS0] = 0xcc00, - [VECS0] = 0xcb00, - }; + u32 cnt = gvt->engine_mmio_list.mocs_mmio_offset_list_cnt; + u32 *regs = gvt->engine_mmio_list.mocs_mmio_offset_list; int ring_id, i; - for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) { + /* Platform doesn't have mocs mmios. */ + if (!regs) + return; + + for (ring_id = 0; ring_id < cnt; ring_id++) { if (!HAS_ENGINE(dev_priv, ring_id)) continue; offset.reg = regs[ring_id]; @@ -327,22 +336,28 @@ out: return ret; } +static u32 gen8_tlb_mmio_offset_list[] = { + [RCS0] = 0x4260, + [VCS0] = 0x4264, + [VCS1] = 0x4268, + [BCS0] = 0x426c, + [VECS0] = 0x4270, +}; + static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_uncore *uncore = &dev_priv->uncore; struct intel_vgpu_submission *s = &vgpu->submission; + u32 *regs = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list; + u32 cnt = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list_cnt; enum forcewake_domains fw; i915_reg_t reg; - u32 regs[] = { - [RCS0] = 0x4260, - [VCS0] = 0x4264, - [VCS1] = 0x4268, - [BCS0] = 0x426c, - [VECS0] = 0x4270, - }; - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) + if (!regs) + return; + + if (WARN_ON(ring_id >= cnt)) return; if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending)) @@ -565,10 +580,17 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { struct engine_mmio *mmio; - if (INTEL_GEN(gvt->dev_priv) >= 9) + if (INTEL_GEN(gvt->dev_priv) >= 9) { gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; - else + gvt->engine_mmio_list.tlb_mmio_offset_list = gen8_tlb_mmio_offset_list; + gvt->engine_mmio_list.tlb_mmio_offset_list_cnt = ARRAY_SIZE(gen8_tlb_mmio_offset_list); + gvt->engine_mmio_list.mocs_mmio_offset_list = gen9_mocs_mmio_offset_list; + gvt->engine_mmio_list.mocs_mmio_offset_list_cnt = ARRAY_SIZE(gen9_mocs_mmio_offset_list); + } else { gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; + gvt->engine_mmio_list.tlb_mmio_offset_list = gen8_tlb_mmio_offset_list; + gvt->engine_mmio_list.tlb_mmio_offset_list_cnt = ARRAY_SIZE(gen8_tlb_mmio_offset_list); + } for (mmio = gvt->engine_mmio_list.mmio; i915_mmio_reg_valid(mmio->reg); mmio++) { diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 0f9440128123..9ad224df9c68 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page( } /** + * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf + * @vgpu: a vGPU + * @dma_addr: guest dma addr + * + * Returns: + * 0 on success, negative error code if failed. + */ +static inline int +intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr); +} + +/** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU * @gfn: guest PFN diff --git a/drivers/gpu/drm/i915/gvt/page_track.h b/drivers/gpu/drm/i915/gvt/page_track.h index fa607a71c3c0..f6eb7135583c 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.h +++ b/drivers/gpu/drm/i915/gvt/page_track.h @@ -25,6 +25,9 @@ #ifndef _GVT_PAGE_TRACK_H_ #define _GVT_PAGE_TRACK_H_ +#include <linux/types.h> + +struct intel_vgpu; struct intel_vgpu_page_track; typedef int (*gvt_page_track_handler_t)( diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h index 7b59e3e88b8b..3dacdad5f529 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.h +++ b/drivers/gpu/drm/i915/gvt/sched_policy.h @@ -34,6 +34,9 @@ #ifndef __GVT_SCHED_POLICY__ #define __GVT_SCHED_POLICY__ +struct intel_gvt; +struct intel_vgpu; + struct intel_gvt_sched_policy_ops { int (*init)(struct intel_gvt *gvt); void (*clean)(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 75baff657e43..685d1e04a5ff 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -35,11 +35,12 @@ #include <linux/kthread.h> -#include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" +#include "i915_gem_gtt.h" #include "gvt.h" #define RING_CTX_OFF(x) \ @@ -58,7 +59,7 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { struct drm_i915_gem_object *ctx_obj = - workload->req->hw_context->state->obj; + workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -84,8 +85,8 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, u32 *reg_state, bool save) { struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; + u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; int i = 0; u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -129,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - workload->req->hw_context->state->obj; + workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -194,7 +195,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, I915_GTT_PAGE_SIZE); @@ -204,9 +205,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } -static inline bool is_gvt_request(struct i915_request *req) +static inline bool is_gvt_request(struct i915_request *rq) { - return i915_gem_context_force_single_submission(req->gem_context); + return intel_context_force_single_submission(rq->context); } static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) @@ -291,9 +292,6 @@ shadow_context_descriptor_update(struct intel_context *ce, * Update bits 0-11 of the context descriptor which includes flags * like GEN8_CTX_* cached in desc_template */ - desc &= U64_MAX << 12; - desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); - desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); desc |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; @@ -309,7 +307,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) u32 *cs; int err; - if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context)) + if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* @@ -365,10 +363,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) } static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) + struct intel_context *ce) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -388,11 +386,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->req) return 0; @@ -418,10 +413,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&vgpu->vgpu_lock); if (workload->shadow) return 0; @@ -532,7 +526,7 @@ static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); struct i915_request *rq = workload->req; struct execlist_ring_context *shadow_ring_context = - (struct execlist_ring_context *)rq->hw_context->lrc_reg_state; + (struct execlist_ring_context *)rq->context->lrc_reg_state; shadow_ring_context->bb_per_ctx_ptr.val = (shadow_ring_context->bb_per_ctx_ptr.val & @@ -571,10 +565,18 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base; + + ring_base = dev_priv->engine[workload->ring_id]->mmio_base; + vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ struct intel_vgpu_shadow_bb *bb, *pos; if (list_empty(&workload->shadow_bb)) @@ -583,8 +585,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb = list_first_entry(&workload->shadow_bb, struct intel_vgpu_shadow_bb, list); - mutex_lock(&dev_priv->drm.struct_mutex); - list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { if (bb->accessing) @@ -602,8 +602,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) list_del(&bb->list); kfree(bb); } - - mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) @@ -627,7 +625,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload) update_shadow_pdps(workload); - set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]); ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { @@ -678,7 +676,6 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -687,7 +684,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ring_id, workload); mutex_lock(&vgpu->vgpu_lock); - mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_workload_req_alloc(workload); if (ret) @@ -722,7 +718,6 @@ out: err_req: if (ret) workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -789,7 +784,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj; + struct drm_i915_gem_object *ctx_obj = rq->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -837,7 +832,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, I915_GTT_PAGE_SIZE); @@ -880,7 +875,7 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); @@ -1019,6 +1014,13 @@ static int workload_thread(void *priv) if (need_force_wake) intel_uncore_forcewake_get(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + /* + * Update the vReg of the vGPU which submitted this + * workload. The vGPU may use these registers for checking + * the context state. The value comes from GPU commands + * in this workload. + */ + update_vreg_in_ctx(workload); ret = dispatch_workload(workload); @@ -1157,7 +1159,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->gem_context->vm)); + i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); for_each_engine(engine, vgpu->gvt->dev_priv, id) intel_context_unpin(s->shadow[id]); @@ -1215,30 +1217,41 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { + struct drm_i915_private *i915 = vgpu->gvt->dev_priv; struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret; - ctx = i915_gem_context_create_gvt(&vgpu->gvt->dev_priv->drm); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + ppgtt = i915_ppgtt_create(&i915->gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); - i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm)); + i915_context_ppgtt_root_save(s, ppgtt); - for_each_engine(engine, vgpu->gvt->dev_priv, i) { + for_each_engine(engine, i915, i) { struct intel_context *ce; INIT_LIST_HEAD(&s->workload_q_head[i]); s->shadow[i] = ERR_PTR(-EINVAL); - ce = i915_gem_context_get_engine(ctx, i); + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_shadow_ctx; } + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(&ppgtt->vm); + intel_context_set_single_submission(ce); + + if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */ + const unsigned int ring_size = 512 * SZ_4K; + + ce->ring = __intel_context_ring_size(ring_size); + } + ret = intel_context_pin(ce); intel_context_put(ce); if (ret) @@ -1264,18 +1277,19 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); - i915_gem_context_put(ctx); + i915_vm_put(&ppgtt->vm); return 0; out_shadow_ctx: - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm)); - for_each_engine(engine, vgpu->gvt->dev_priv, i) { + i915_context_ppgtt_root_restore(s, ppgtt); + for_each_engine(engine, i915, i) { if (IS_ERR(s->shadow[i])) break; intel_context_unpin(s->shadow[i]); + intel_context_put(s->shadow[i]); } - i915_gem_context_put(ctx); + i915_vm_put(&ppgtt->vm); return ret; } @@ -1424,9 +1438,6 @@ static int prepare_mm(struct intel_vgpu_workload *workload) #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) -#define get_last_workload(q) \ - (list_empty(q) ? NULL : container_of(q->prev, \ - struct intel_vgpu_workload, list)) /** * intel_vgpu_create_workload - create a vGPU workload * @vgpu: a vGPU @@ -1446,7 +1457,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, { struct intel_vgpu_submission *s = &vgpu->submission; struct list_head *q = workload_q_head(vgpu, ring_id); - struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *last_workload = NULL; struct intel_vgpu_workload *workload = NULL; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; u64 ring_context_gpa; @@ -1472,15 +1483,20 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, head &= RB_HEAD_OFF_MASK; tail &= RB_TAIL_OFF_MASK; - if (last_workload && same_context(&last_workload->ctx_desc, desc)) { - gvt_dbg_el("ring id %d cur workload == last\n", ring_id); - gvt_dbg_el("ctx head %x real head %lx\n", head, - last_workload->rb_tail); - /* - * cannot use guest context head pointer here, - * as it might not be updated at this time - */ - head = last_workload->rb_tail; + list_for_each_entry_reverse(last_workload, q, list) { + + if (same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", + ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + break; + } } gvt_dbg_el("ring id %d begin a new workload\n", ring_id); @@ -1564,9 +1580,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 44ce3c2b9ac1..85bd9bf4f6ee 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) */ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->gvt->lock); + mutex_lock(&vgpu->vgpu_lock); vgpu->active = true; - mutex_unlock(&vgpu->gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -420,9 +420,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_submission; - ret = intel_gvt_debugfs_add_vgpu(vgpu); - if (ret) - goto out_clean_sched_policy; + intel_gvt_debugfs_add_vgpu(vgpu); ret = intel_gvt_hypervisor_set_opregion(vgpu); if (ret) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 293e5bcc4b6c..b0a499753526 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -4,14 +4,16 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/debugobjects.h> + +#include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_active.h" #include "i915_globals.h" -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) - /* * Active refs memory management * @@ -25,55 +27,200 @@ static struct i915_global_active { } global; struct active_node { - struct i915_active_request base; + struct i915_active_fence base; struct i915_active *ref; struct rb_node node; u64 timeline; }; +static inline struct active_node * +node_from_active(struct i915_active_fence *active) +{ + return container_of(active, struct active_node, base); +} + +#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) + +static inline bool is_barrier(const struct i915_active_fence *active) +{ + return IS_ERR(rcu_access_pointer(active->fence)); +} + +static inline struct llist_node *barrier_to_ll(struct active_node *node) +{ + GEM_BUG_ON(!is_barrier(&node->base)); + return (struct llist_node *)&node->base.cb.node; +} + +static inline struct intel_engine_cs * +__barrier_to_engine(struct active_node *node) +{ + return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); +} + +static inline struct intel_engine_cs * +barrier_to_engine(struct active_node *node) +{ + GEM_BUG_ON(!is_barrier(&node->base)); + return __barrier_to_engine(node); +} + +static inline struct active_node *barrier_from_ll(struct llist_node *x) +{ + return container_of((struct list_head *)x, + struct active_node, base.cb.node); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) + +static void *active_debug_hint(void *addr) +{ + struct i915_active *ref = addr; + + return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; +} + +static struct debug_obj_descr active_debug_desc = { + .name = "i915_active", + .debug_hint = active_debug_hint, +}; + +static void debug_active_init(struct i915_active *ref) +{ + debug_object_init(ref, &active_debug_desc); +} + +static void debug_active_activate(struct i915_active *ref) +{ + lockdep_assert_held(&ref->tree_lock); + if (!atomic_read(&ref->count)) /* before the first inc */ + debug_object_activate(ref, &active_debug_desc); +} + +static void debug_active_deactivate(struct i915_active *ref) +{ + lockdep_assert_held(&ref->tree_lock); + if (!atomic_read(&ref->count)) /* after the last dec */ + debug_object_deactivate(ref, &active_debug_desc); +} + +static void debug_active_fini(struct i915_active *ref) +{ + debug_object_free(ref, &active_debug_desc); +} + +static void debug_active_assert(struct i915_active *ref) +{ + debug_object_assert_init(ref, &active_debug_desc); +} + +#else + +static inline void debug_active_init(struct i915_active *ref) { } +static inline void debug_active_activate(struct i915_active *ref) { } +static inline void debug_active_deactivate(struct i915_active *ref) { } +static inline void debug_active_fini(struct i915_active *ref) { } +static inline void debug_active_assert(struct i915_active *ref) { } + +#endif + static void -__active_park(struct i915_active *ref) +__active_retire(struct i915_active *ref) { struct active_node *it, *n; + struct rb_root root; + unsigned long flags; - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - GEM_BUG_ON(i915_active_request_isset(&it->base)); + GEM_BUG_ON(i915_active_is_idle(ref)); + + /* return the unused nodes to our slabcache -- flushing the allocator */ + if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) + return; + + GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); + debug_active_deactivate(ref); + + root = ref->tree; + ref->tree = RB_ROOT; + ref->cache = NULL; + + spin_unlock_irqrestore(&ref->tree_lock, flags); + + /* After the final retire, the entire struct may be freed */ + if (ref->retire) + ref->retire(ref); + + /* ... except if you wait on it, you must manage your own references! */ + wake_up_var(ref); + + rbtree_postorder_for_each_entry_safe(it, n, &root, node) { + GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } - ref->tree = RB_ROOT; } static void -__active_retire(struct i915_active *ref) +active_work(struct work_struct *wrk) { - GEM_BUG_ON(!ref->count); - if (--ref->count) + struct i915_active *ref = container_of(wrk, typeof(*ref), work); + + GEM_BUG_ON(!atomic_read(&ref->count)); + if (atomic_add_unless(&ref->count, -1, 1)) return; - /* return the unused nodes to our slabcache */ - __active_park(ref); + __active_retire(ref); +} + +static void +active_retire(struct i915_active *ref) +{ + GEM_BUG_ON(!atomic_read(&ref->count)); + if (atomic_add_unless(&ref->count, -1, 1)) + return; + + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { + queue_work(system_unbound_wq, &ref->work); + return; + } + + __active_retire(ref); +} + +static inline struct dma_fence ** +__active_fence_slot(struct i915_active_fence *active) +{ + return (struct dma_fence ** __force)&active->fence; +} - ref->retire(ref); +static inline bool +active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_active_fence *active = + container_of(cb, typeof(*active), cb); + + return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; } static void -node_retire(struct i915_active_request *base, struct i915_request *rq) +node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - __active_retire(container_of(base, struct active_node, base)->ref); + if (active_fence_cb(fence, cb)) + active_retire(container_of(cb, struct active_node, base.cb)->ref); } static void -last_retire(struct i915_active_request *base, struct i915_request *rq) +excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - __active_retire(container_of(base, struct i915_active, last)); + if (active_fence_cb(fence, cb)) + active_retire(container_of(cb, struct i915_active, excl.cb)); } -static struct i915_active_request * -active_instance(struct i915_active *ref, u64 idx) +static struct i915_active_fence * +active_instance(struct i915_active *ref, struct intel_timeline *tl) { - struct active_node *node; + struct active_node *node, *prealloc; struct rb_node **p, *parent; - struct i915_request *old; + u64 idx = tl->fence_context; /* * We track the most recently used timeline to skip a rbtree search @@ -81,20 +228,18 @@ active_instance(struct i915_active *ref, u64 idx) * at all. We can reuse the last slot if it is empty, that is * after the previous activity has been retired, or if it matches the * current timeline. - * - * Note that we allow the timeline to be active simultaneously in - * the rbtree and the last cache. We do this to avoid having - * to search and replace the rbtree element for a new timeline, with - * the cost being that we must be aware that the ref may be retired - * twice for the same timeline (as the older rbtree element will be - * retired before the new request added to last). */ - old = i915_active_request_raw(&ref->last, BKL(ref)); - if (!old || old->fence.context == idx) - goto out; + node = READ_ONCE(ref->cache); + if (node && node->timeline == idx) + return &node->base; - /* Move the currently active fence into the rbtree */ - idx = old->fence.context; + /* Preallocate a replacement, just in case */ + prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + if (!prealloc) + return NULL; + + spin_lock_irq(&ref->tree_lock); + GEM_BUG_ON(i915_active_is_idle(ref)); parent = NULL; p = &ref->tree.rb_node; @@ -102,8 +247,10 @@ active_instance(struct i915_active *ref, u64 idx) parent = *p; node = rb_entry(parent, struct active_node, node); - if (node->timeline == idx) - goto replace; + if (node->timeline == idx) { + kmem_cache_free(global.slab_cache, prealloc); + goto out; + } if (node->timeline < idx) p = &parent->rb_right; @@ -111,202 +258,428 @@ active_instance(struct i915_active *ref, u64 idx) p = &parent->rb_left; } - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - - /* kmalloc may retire the ref->last (thanks shrinker)! */ - if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) { - kmem_cache_free(global.slab_cache, node); - goto out; - } - - if (unlikely(!node)) - return ERR_PTR(-ENOMEM); - - i915_active_request_init(&node->base, NULL, node_retire); + node = prealloc; + __i915_active_fence_init(&node->base, NULL, node_retire); node->ref = ref; node->timeline = idx; rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); -replace: +out: + ref->cache = node; + spin_unlock_irq(&ref->tree_lock); + + BUILD_BUG_ON(offsetof(typeof(*node), base)); + return &node->base; +} + +void __i915_active_init(struct i915_active *ref, + int (*active)(struct i915_active *ref), + void (*retire)(struct i915_active *ref), + struct lock_class_key *mkey, + struct lock_class_key *wkey) +{ + unsigned long bits; + + debug_active_init(ref); + + ref->flags = 0; + ref->active = active; + ref->retire = ptr_unpack_bits(retire, &bits, 2); + if (bits & I915_ACTIVE_MAY_SLEEP) + ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + + spin_lock_init(&ref->tree_lock); + ref->tree = RB_ROOT; + ref->cache = NULL; + + init_llist_head(&ref->preallocated_barriers); + atomic_set(&ref->count, 0); + __mutex_init(&ref->mutex, "i915_active", mkey); + __i915_active_fence_init(&ref->excl, NULL, excl_retire); + INIT_WORK(&ref->work, active_work); +#if IS_ENABLED(CONFIG_LOCKDEP) + lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); +#endif +} + +static bool ____active_del_barrier(struct i915_active *ref, + struct active_node *node, + struct intel_engine_cs *engine) + +{ + struct llist_node *head = NULL, *tail = NULL; + struct llist_node *pos, *next; + + GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); + /* - * Overwrite the previous active slot in the rbtree with last, - * leaving last zeroed. If the previous slot is still active, - * we must be careful as we now only expect to receive one retire - * callback not two, and so much undo the active counting for the - * overwritten slot. + * Rebuild the llist excluding our node. We may perform this + * outside of the kernel_context timeline mutex and so someone + * else may be manipulating the engine->barrier_tasks, in + * which case either we or they will be upset :) + * + * A second __active_del_barrier() will report failure to claim + * the active_node and the caller will just shrug and know not to + * claim ownership of its node. + * + * A concurrent i915_request_add_active_barriers() will miss adding + * any of the tasks, but we will try again on the next -- and since + * we are actively using the barrier, we know that there will be + * at least another opportunity when we idle. */ - if (i915_active_request_isset(&node->base)) { - /* Retire ourselves from the old rq->active_list */ - __list_del_entry(&node->base.link); - ref->count--; - GEM_BUG_ON(!ref->count); + llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { + if (node == barrier_from_ll(pos)) { + node = NULL; + continue; + } + + pos->next = head; + head = pos; + if (!tail) + tail = pos; } - GEM_BUG_ON(list_empty(&ref->last.link)); - list_replace_init(&ref->last.link, &node->base.link); - node->base.request = fetch_and_zero(&ref->last.request); + if (head) + llist_add_batch(head, tail, &engine->barrier_tasks); -out: - return &ref->last; + return !node; } -void i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, - void (*retire)(struct i915_active *ref)) +static bool +__active_del_barrier(struct i915_active *ref, struct active_node *node) { - ref->i915 = i915; - ref->retire = retire; - ref->tree = RB_ROOT; - i915_active_request_init(&ref->last, NULL, last_retire); - init_llist_head(&ref->barriers); - ref->count = 0; + return ____active_del_barrier(ref, node, barrier_to_engine(node)); } int i915_active_ref(struct i915_active *ref, - u64 timeline, - struct i915_request *rq) + struct intel_timeline *tl, + struct dma_fence *fence) { - struct i915_active_request *active; - int err = 0; + struct i915_active_fence *active; + int err; + + lockdep_assert_held(&tl->mutex); /* Prevent reaping in case we malloc/wait while building the tree */ - i915_active_acquire(ref); + err = i915_active_acquire(ref); + if (err) + return err; - active = active_instance(ref, timeline); - if (IS_ERR(active)) { - err = PTR_ERR(active); + active = active_instance(ref, tl); + if (!active) { + err = -ENOMEM; goto out; } - if (!i915_active_request_isset(active)) - ref->count++; - __i915_active_request_set(active, rq); + if (is_barrier(active)) { /* proto-node used by our idle barrier */ + /* + * This request is on the kernel_context timeline, and so + * we can use it to substitute for the pending idle-barrer + * request that we want to emit on the kernel_context. + */ + __active_del_barrier(ref, node_from_active(active)); + RCU_INIT_POINTER(active->fence, NULL); + atomic_dec(&ref->count); + } + if (!__i915_active_fence_set(active, fence)) + atomic_inc(&ref->count); - GEM_BUG_ON(!ref->count); out: i915_active_release(ref); return err; } -bool i915_active_acquire(struct i915_active *ref) +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) { - lockdep_assert_held(BKL(ref)); - return !ref->count++; + /* We expect the caller to manage the exclusive timeline ordering */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + if (!__i915_active_fence_set(&ref->excl, f)) + atomic_inc(&ref->count); } -void i915_active_release(struct i915_active *ref) +bool i915_active_acquire_if_busy(struct i915_active *ref) { - lockdep_assert_held(BKL(ref)); - __active_retire(ref); + debug_active_assert(ref); + return atomic_add_unless(&ref->count, 1, 0); } -int i915_active_wait(struct i915_active *ref) +int i915_active_acquire(struct i915_active *ref) { - struct active_node *it, *n; - int ret = 0; + int err; - if (i915_active_acquire(ref)) - goto out_release; + if (i915_active_acquire_if_busy(ref)) + return 0; - ret = i915_active_request_retire(&ref->last, BKL(ref)); - if (ret) - goto out_release; + err = mutex_lock_interruptible(&ref->mutex); + if (err) + return err; - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - ret = i915_active_request_retire(&it->base, BKL(ref)); - if (ret) - break; + if (likely(!i915_active_acquire_if_busy(ref))) { + if (ref->active) + err = ref->active(ref); + if (!err) { + spin_lock_irq(&ref->tree_lock); /* __active_retire() */ + debug_active_activate(ref); + atomic_inc(&ref->count); + spin_unlock_irq(&ref->tree_lock); + } } -out_release: - i915_active_release(ref); - return ret; + mutex_unlock(&ref->mutex); + + return err; +} + +void i915_active_release(struct i915_active *ref) +{ + debug_active_assert(ref); + active_retire(ref); } -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active) +static void enable_signaling(struct i915_active_fence *active) { - struct i915_request *barrier = - i915_active_request_raw(active, &rq->i915->drm.struct_mutex); + struct dma_fence *fence; - return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; + fence = i915_active_fence_get(active); + if (!fence) + return; + + dma_fence_enable_sw_signaling(fence); + dma_fence_put(fence); } -int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) +int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; int err = 0; - /* await allocates and so we need to avoid hitting the shrinker */ - if (i915_active_acquire(ref)) - goto out; /* was idle */ + might_sleep(); - err = i915_request_await_active_request(rq, &ref->last); - if (err) - goto out; + if (!i915_active_acquire_if_busy(ref)) + return 0; + /* Flush lazy signals */ + enable_signaling(&ref->excl); rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - err = i915_request_await_active_request(rq, &it->base); - if (err) - goto out; + if (is_barrier(&it->base)) /* unconnected idle barrier */ + continue; + + enable_signaling(&it->base); } + /* Any fence added after the wait begins will not be auto-signaled */ -out: i915_active_release(ref); + if (err) + return err; + + if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) + return -EINTR; + + flush_work(&ref->work); + return 0; +} + +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) +{ + int err = 0; + + if (rcu_access_pointer(ref->excl.fence)) { + struct dma_fence *fence; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&ref->excl.fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + } + + /* In the future we may choose to await on all fences */ + return err; } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { - GEM_BUG_ON(i915_active_request_isset(&ref->last)); + debug_active_fini(ref); + GEM_BUG_ON(atomic_read(&ref->count)); + GEM_BUG_ON(work_pending(&ref->work)); GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); - GEM_BUG_ON(ref->count); + mutex_destroy(&ref->mutex); } #endif +static inline bool is_idle_barrier(struct active_node *node, u64 idx) +{ + return node->timeline == idx && !i915_active_fence_isset(&node->base); +} + +static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) +{ + struct rb_node *prev, *p; + + if (RB_EMPTY_ROOT(&ref->tree)) + return NULL; + + spin_lock_irq(&ref->tree_lock); + GEM_BUG_ON(i915_active_is_idle(ref)); + + /* + * Try to reuse any existing barrier nodes already allocated for this + * i915_active, due to overlapping active phases there is likely a + * node kept alive (as we reuse before parking). We prefer to reuse + * completely idle barriers (less hassle in manipulating the llists), + * but otherwise any will do. + */ + if (ref->cache && is_idle_barrier(ref->cache, idx)) { + p = &ref->cache->node; + goto match; + } + + prev = NULL; + p = ref->tree.rb_node; + while (p) { + struct active_node *node = + rb_entry(p, struct active_node, node); + + if (is_idle_barrier(node, idx)) + goto match; + + prev = p; + if (node->timeline < idx) + p = p->rb_right; + else + p = p->rb_left; + } + + /* + * No quick match, but we did find the leftmost rb_node for the + * kernel_context. Walk the rb_tree in-order to see if there were + * any idle-barriers on this timeline that we missed, or just use + * the first pending barrier. + */ + for (p = prev; p; p = rb_next(p)) { + struct active_node *node = + rb_entry(p, struct active_node, node); + struct intel_engine_cs *engine; + + if (node->timeline > idx) + break; + + if (node->timeline < idx) + continue; + + if (is_idle_barrier(node, idx)) + goto match; + + /* + * The list of pending barriers is protected by the + * kernel_context timeline, which notably we do not hold + * here. i915_request_add_active_barriers() may consume + * the barrier before we claim it, so we have to check + * for success. + */ + engine = __barrier_to_engine(node); + smp_rmb(); /* serialise with add_active_barriers */ + if (is_barrier(&node->base) && + ____active_del_barrier(ref, node, engine)) + goto match; + } + + spin_unlock_irq(&ref->tree_lock); + + return NULL; + +match: + rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ + if (p == &ref->cache->node) + ref->cache = NULL; + spin_unlock_irq(&ref->tree_lock); + + return rb_entry(p, struct active_node, node); +} + int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; - struct llist_node *pos, *next; - unsigned long tmp; + intel_engine_mask_t tmp, mask = engine->mask; + struct llist_node *first = NULL, *last = NULL; + struct intel_gt *gt = engine->gt; int err; - GEM_BUG_ON(!engine->mask); - for_each_engine_masked(engine, i915, engine->mask, tmp) { - struct intel_context *kctx = engine->kernel_context; + GEM_BUG_ON(i915_active_is_idle(ref)); + + /* Wait until the previous preallocation is completed */ + while (!llist_empty(&ref->preallocated_barriers)) + cond_resched(); + + /* + * Preallocate a node for each physical engine supporting the target + * engine (remember virtual engines have more than one sibling). + * We can then use the preallocated nodes in + * i915_active_acquire_barrier() + */ + for_each_engine_masked(engine, gt, mask, tmp) { + u64 idx = engine->kernel_context->timeline->fence_context; + struct llist_node *prev = first; struct active_node *node; - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - if (unlikely(!node)) { - err = -ENOMEM; - goto unwind; + node = reuse_idle_barrier(ref, idx); + if (!node) { + node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + if (!node) { + err = ENOMEM; + goto unwind; + } + + RCU_INIT_POINTER(node->base.fence, NULL); + node->base.cb.func = node_retire; + node->timeline = idx; + node->ref = ref; } - i915_active_request_init(&node->base, - (void *)engine, node_retire); - node->timeline = kctx->ring->timeline->fence_context; - node->ref = ref; - ref->count++; + if (!i915_active_fence_isset(&node->base)) { + /* + * Mark this as being *our* unconnected proto-node. + * + * Since this node is not in any list, and we have + * decoupled it from the rbtree, we can reuse the + * request to indicate this is an idle-barrier node + * and then we can use the rb_node and list pointers + * for our tracking of the pending barrier. + */ + RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); + node->base.cb.node.prev = (void *)engine; + atomic_inc(&ref->count); + } + GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); + GEM_BUG_ON(barrier_to_engine(node) != engine); + first = barrier_to_ll(node); + first->next = prev; + if (!last) + last = first; intel_engine_pm_get(engine); - llist_add((struct llist_node *)&node->base.link, - &ref->barriers); } + GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); + llist_add_batch(first, last, &ref->preallocated_barriers); + return 0; unwind: - llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { - struct active_node *node; + while (first) { + struct active_node *node = barrier_from_ll(first); - node = container_of((struct list_head *)pos, - typeof(*node), base.link); - engine = (void *)rcu_access_pointer(node->base.request); + first = first->next; + + atomic_dec(&ref->count); + intel_engine_pm_put(barrier_to_engine(node)); - intel_engine_pm_put(engine); kmem_cache_free(global.slab_cache, node); } return err; @@ -315,68 +688,160 @@ unwind: void i915_active_acquire_barrier(struct i915_active *ref) { struct llist_node *pos, *next; + unsigned long flags; - i915_active_acquire(ref); + GEM_BUG_ON(i915_active_is_idle(ref)); - llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { - struct intel_engine_cs *engine; - struct active_node *node; + /* + * Transfer the list of preallocated barriers into the + * i915_active rbtree, but only as proto-nodes. They will be + * populated by i915_request_add_active_barriers() to point to the + * request that will eventually release them. + */ + llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { + struct active_node *node = barrier_from_ll(pos); + struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; - node = container_of((struct list_head *)pos, - typeof(*node), base.link); - - engine = (void *)rcu_access_pointer(node->base.request); - RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); - + spin_lock_irqsave_nested(&ref->tree_lock, flags, + SINGLE_DEPTH_NESTING); parent = NULL; p = &ref->tree.rb_node; while (*p) { + struct active_node *it; + parent = *p; - if (rb_entry(parent, - struct active_node, - node)->timeline < node->timeline) + + it = rb_entry(parent, struct active_node, node); + if (it->timeline < node->timeline) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + spin_unlock_irqrestore(&ref->tree_lock, flags); - llist_add((struct llist_node *)&node->base.link, - &engine->barrier_tasks); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - i915_active_release(ref); } -void i915_request_add_barriers(struct i915_request *rq) +static struct dma_fence **ll_to_fence_slot(struct llist_node *node) +{ + return __active_fence_slot(&barrier_from_ll(node)->base); +} + +void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; + unsigned long flags; - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) - list_add_tail((struct list_head *)node, &rq->active_list); + GEM_BUG_ON(!intel_context_is_barrier(rq->context)); + GEM_BUG_ON(intel_engine_is_virtual(engine)); + GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); + + node = llist_del_all(&engine->barrier_tasks); + if (!node) + return; + /* + * Attach the list of proto-fences to the in-flight request such + * that the parent i915_active will be released when this request + * is retired. + */ + spin_lock_irqsave(&rq->lock, flags); + llist_for_each_safe(node, next, node) { + /* serialise with reuse_idle_barrier */ + smp_store_mb(*ll_to_fence_slot(node), &rq->fence); + list_add_tail((struct list_head *)node, &rq->fence.cb_list); + } + spin_unlock_irqrestore(&rq->lock, flags); } -int i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq) +/* + * __i915_active_fence_set: Update the last active fence along its timeline + * @active: the active tracker + * @fence: the new fence (under construction) + * + * Records the new @fence as the last active fence along its timeline in + * this active tracker, moving the tracking callbacks from the previous + * fence onto this one. Returns the previous fence (if not already completed), + * which the caller must ensure is executed before the new fence. To ensure + * that the order of fences within the timeline of the i915_active_fence is + * understood, it should be locked by the caller. + */ +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence) { - int err; + struct dma_fence *prev; + unsigned long flags; - /* Must maintain ordering wrt previous active requests */ - err = i915_request_await_active_request(rq, active); - if (err) - return err; + if (fence == rcu_access_pointer(active->fence)) + return fence; - __i915_active_request_set(active, rq); - return 0; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + + /* + * Consider that we have two threads arriving (A and B), with + * C already resident as the active->fence. + * + * A does the xchg first, and so it sees C or NULL depending + * on the timing of the interrupt handler. If it is NULL, the + * previous fence must have been signaled and we know that + * we are first on the timeline. If it is still present, + * we acquire the lock on that fence and serialise with the interrupt + * handler, in the process removing it from any future interrupt + * callback. A will then wait on C before executing (if present). + * + * As B is second, it sees A as the previous fence and so waits for + * it to complete its transition and takes over the occupancy for + * itself -- remembering that it needs to wait on A before executing. + * + * Note the strong ordering of the timeline also provides consistent + * nesting rules for the fence->lock; the inner lock is always the + * older lock. + */ + spin_lock_irqsave(fence->lock, flags); + prev = xchg(__active_fence_slot(active), fence); + if (prev) { + GEM_BUG_ON(prev == fence); + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + __list_del_entry(&active->cb.node); + spin_unlock(prev->lock); /* serialise with prev->cb_list */ + } + GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); + list_add_tail(&active->cb.node, &fence->cb_list); + spin_unlock_irqrestore(fence->lock, flags); + + return prev; +} + +int i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq) +{ + struct dma_fence *fence; + int err = 0; + + /* Must maintain timeline ordering wrt previous active requests */ + rcu_read_lock(); + fence = __i915_active_fence_set(active, &rq->fence); + if (fence) /* but the previous fence may not belong to that timeline! */ + fence = dma_fence_get_rcu(fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + return err; } -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request) +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) { - /* Space left intentionally blank */ + active_fence_cb(fence, cb); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index c14eebf6d074..51e1e854ca55 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -12,6 +12,10 @@ #include "i915_active_types.h" #include "i915_request.h" +struct i915_request; +struct intel_engine_cs; +struct intel_timeline; + /* * We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. @@ -28,319 +32,95 @@ * write access so that we can perform concurrent read operations between * the CPU and GPU engines, as well as waiting for all rendering to * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_active_request to track the most recent (in + * object then embeds a #i915_active_fence to track the most recent (in * retirement order) request relevant for the desired mode of access. - * The #i915_active_request is updated with i915_active_request_set() to + * The #i915_active_fence is updated with i915_active_fence_set() to * track the most recent fence request, typically this is done as part of * i915_vma_move_to_active(). * - * When the #i915_active_request completes (is retired), it will + * When the #i915_active_fence completes (is retired), it will * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_active_request.request == NULL). The owner + * itself as idle (i915_active_fence.request == NULL). The owner * can then perform any action, such as delayed freeing of an active * resource including itself. */ -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request); +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * i915_active_request_init - prepares the activity tracker for use + * __i915_active_fence_init - prepares the activity tracker for use * @active - the active tracker - * @rq - initial request to track, can be NULL + * @fence - initial fence to track, can be NULL * @func - a callback when then the tracker is retired (becomes idle), * can be NULL * - * i915_active_request_init() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired + * i915_active_fence_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active fence + * associated with it. When the last fence becomes idle, when it is retired * after completion, the optional callback @func is invoked. */ static inline void -i915_active_request_init(struct i915_active_request *active, - struct i915_request *rq, - i915_active_retire_fn retire) +__i915_active_fence_init(struct i915_active_fence *active, + void *fence, + dma_fence_func_t fn) { - RCU_INIT_POINTER(active->request, rq); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_active_retire_noop; + RCU_INIT_POINTER(active->fence, fence); + active->cb.func = fn ?: i915_active_noop; } -#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL) - -/** - * i915_active_request_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * __i915_active_request_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -__i915_active_request_set(struct i915_active_request *active, - struct i915_request *request) -{ - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} +#define INIT_ACTIVE_FENCE(A) \ + __i915_active_fence_init((A), NULL, NULL) -int __must_check -i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq); +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence); /** - * i915_active_request_set_retire_fn - updates the retirement callback + * i915_active_fence_set - updates the tracker to watch the current fence * @active - the active tracker - * @fn - the routine called when the request is retired - * @mutex - struct_mutex used to guard retirements + * @rq - the request to watch * - * i915_active_request_set_retire_fn() updates the function pointer that - * is called when the final request associated with the @active tracker - * is retired. + * i915_active_fence_set() watches the given @rq for completion. While + * that @rq is busy, the @active reports busy. When that @rq is signaled + * (or else retired) the @active tracker is updated to report idle. */ -static inline void -i915_active_request_set_retire_fn(struct i915_active_request *active, - i915_active_retire_fn fn, - struct mutex *mutex) -{ - lockdep_assert_held(mutex); - active->retire = fn ?: i915_active_retire_noop; -} - -/** - * i915_active_request_raw - return the active request - * @active - the active tracker - * - * i915_active_request_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_raw(const struct i915_active_request *active, - struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_active_request_peek - report the active request being monitored - * @active - the active tracker - * - * i915_active_request_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_peek(const struct i915_active_request *active, - struct mutex *mutex) -{ - struct i915_request *request; - - request = i915_active_request_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_active_request_get - return a reference to the active request - * @active - the active tracker - * - * i915_active_request_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_get(const struct i915_active_request *active, - struct mutex *mutex) -{ - return i915_request_get(i915_active_request_peek(active, mutex)); -} - -/** - * __i915_active_request_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_active_request_get() returns a reference to the active request, - * or NULL if the active tracker is idle. The caller must hold the RCU read - * lock, but the returned pointer is safe to use outside of RCU. - */ -static inline struct i915_request * -__i915_active_request_get_rcu(const struct i915_active_request *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - +int __must_check +i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq); /** - * i915_active_request_get_unlocked - return a reference to the active request + * i915_active_fence_get - return a reference to the active fence * @active - the active tracker * - * i915_active_request_get_unlocked() returns a reference to the active request, + * i915_active_fence_get() returns a reference to the active fence, * or NULL if the active tracker is idle. The reference is obtained under RCU, * so no locking is required by the caller. * - * The reference should be freed with i915_request_put(). + * The reference should be freed with dma_fence_put(). */ -static inline struct i915_request * -i915_active_request_get_unlocked(const struct i915_active_request *active) +static inline struct dma_fence * +i915_active_fence_get(struct i915_active_fence *active) { - struct i915_request *request; + struct dma_fence *fence; rcu_read_lock(); - request = __i915_active_request_get_rcu(active); + fence = dma_fence_get_rcu_safe(&active->fence); rcu_read_unlock(); - return request; + return fence; } /** - * i915_active_request_isset - report whether the active tracker is assigned + * i915_active_fence_isset - report whether the active tracker is assigned * @active - the active tracker * - * i915_active_request_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle + * i915_active_fence_isset() returns true if the active tracker is currently + * assigned to a fence. Due to the lazy retiring, that fence may be idle * and this may report stale information. */ static inline bool -i915_active_request_isset(const struct i915_active_request *active) -{ - return rcu_access_pointer(active->request); -} - -/** - * i915_active_request_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_active_request_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_active_request_retire(struct i915_active_request *active, - struct mutex *mutex) +i915_active_fence_isset(const struct i915_active_fence *active) { - struct i915_request *request; - long ret; - - request = i915_active_request_raw(active, mutex); - if (!request) - return 0; - - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; + return rcu_access_pointer(active->fence); } /* @@ -369,35 +149,55 @@ i915_active_request_retire(struct i915_active_request *active, * synchronisation. */ -void i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, - void (*retire)(struct i915_active *ref)); +void __i915_active_init(struct i915_active *ref, + int (*active)(struct i915_active *ref), + void (*retire)(struct i915_active *ref), + struct lock_class_key *mkey, + struct lock_class_key *wkey); -int i915_active_ref(struct i915_active *ref, - u64 timeline, - struct i915_request *rq); +/* Specialise each class of i915_active to avoid impossible lockdep cycles. */ +#define i915_active_init(ref, active, retire) do { \ + static struct lock_class_key __mkey; \ + static struct lock_class_key __wkey; \ + \ + __i915_active_init(ref, active, retire, &__mkey, &__wkey); \ +} while (0) -int i915_active_wait(struct i915_active *ref); +int i915_active_ref(struct i915_active *ref, + struct intel_timeline *tl, + struct dma_fence *fence); -int i915_request_await_active(struct i915_request *rq, - struct i915_active *ref); -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active); +static inline int +i915_active_add_request(struct i915_active *ref, struct i915_request *rq) +{ + return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); +} -bool i915_active_acquire(struct i915_active *ref); +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); -static inline void i915_active_cancel(struct i915_active *ref) +static inline bool i915_active_has_exclusive(struct i915_active *ref) { - GEM_BUG_ON(ref->count != 1); - ref->count = 0; + return rcu_access_pointer(ref->excl.fence); } +int i915_active_wait(struct i915_active *ref); + +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); + +int i915_active_acquire(struct i915_active *ref); +bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); +static inline void __i915_active_acquire(struct i915_active *ref) +{ + GEM_BUG_ON(!atomic_read(&ref->count)); + atomic_inc(&ref->count); +} + static inline bool i915_active_is_idle(const struct i915_active *ref) { - return !ref->count; + return !atomic_read(&ref->count); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) @@ -409,6 +209,9 @@ static inline void i915_active_fini(struct i915_active *ref) { } int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine); void i915_active_acquire_barrier(struct i915_active *ref); -void i915_request_add_barriers(struct i915_request *rq); +void i915_request_add_active_barriers(struct i915_request *rq); + +void i915_active_print(struct i915_active *ref, struct drm_printer *m); +void i915_active_unlock_wait(struct i915_active *ref); #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index c025991b9233..6360c3e4b765 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -7,33 +7,48 @@ #ifndef _I915_ACTIVE_TYPES_H_ #define _I915_ACTIVE_TYPES_H_ +#include <linux/atomic.h> +#include <linux/dma-fence.h> #include <linux/llist.h> +#include <linux/mutex.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> +#include <linux/workqueue.h> -struct drm_i915_private; -struct i915_active_request; -struct i915_request; +#include "i915_utils.h" -typedef void (*i915_active_retire_fn)(struct i915_active_request *, - struct i915_request *); - -struct i915_active_request { - struct i915_request __rcu *request; - struct list_head link; - i915_active_retire_fn retire; +struct i915_active_fence { + struct dma_fence __rcu *fence; + struct dma_fence_cb cb; }; +struct active_node; + +#define I915_ACTIVE_MAY_SLEEP BIT(0) + +#define __i915_active_call __aligned(4) +#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) + struct i915_active { - struct drm_i915_private *i915; + atomic_t count; + struct mutex mutex; + spinlock_t tree_lock; + struct active_node *cache; struct rb_root tree; - struct i915_active_request last; - unsigned int count; + /* Preallocated "exclusive" node */ + struct i915_active_fence excl; + + unsigned long flags; +#define I915_ACTIVE_RETIRE_SLEEPS BIT(0) + + int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); - struct llist_head barriers; + struct work_struct work; + + struct llist_head preallocated_barriers; }; #endif /* _I915_ACTIVE_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c new file mode 100644 index 000000000000..66883af64ca1 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kmemleak.h> +#include <linux/slab.h> + +#include "i915_buddy.h" + +#include "i915_gem.h" +#include "i915_globals.h" +#include "i915_utils.h" + +static struct i915_global_block { + struct i915_global base; + struct kmem_cache *slab_blocks; +} global; + +static void i915_global_buddy_shrink(void) +{ + kmem_cache_shrink(global.slab_blocks); +} + +static void i915_global_buddy_exit(void) +{ + kmem_cache_destroy(global.slab_blocks); +} + +static struct i915_global_block global = { { + .shrink = i915_global_buddy_shrink, + .exit = i915_global_buddy_exit, +} }; + +int __init i915_global_buddy_init(void) +{ + global.slab_blocks = KMEM_CACHE(i915_buddy_block, SLAB_HWCACHE_ALIGN); + if (!global.slab_blocks) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} + +static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_block *parent, + unsigned int order, + u64 offset) +{ + struct i915_buddy_block *block; + + block = kmem_cache_zalloc(global.slab_blocks, GFP_KERNEL); + if (!block) + return NULL; + + block->header = offset; + block->header |= order; + block->parent = parent; + + return block; +} + +static void i915_block_free(struct i915_buddy_block *block) +{ + kmem_cache_free(global.slab_blocks, block); +} + +static void mark_allocated(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_ALLOCATED; + + list_del(&block->link); +} + +static void mark_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_FREE; + + list_add(&block->link, + &mm->free_list[i915_buddy_block_order(block)]); +} + +static void mark_split(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_SPLIT; + + list_del(&block->link); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) +{ + unsigned int i; + u64 offset; + + if (size < chunk_size) + return -EINVAL; + + if (chunk_size < PAGE_SIZE) + return -EINVAL; + + if (!is_power_of_2(chunk_size)) + return -EINVAL; + + size = round_down(size, chunk_size); + + mm->size = size; + mm->chunk_size = chunk_size; + mm->max_order = ilog2(size) - ilog2(chunk_size); + + GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); + + mm->free_list = kmalloc_array(mm->max_order + 1, + sizeof(struct list_head), + GFP_KERNEL); + if (!mm->free_list) + return -ENOMEM; + + for (i = 0; i <= mm->max_order; ++i) + INIT_LIST_HEAD(&mm->free_list[i]); + + mm->n_roots = hweight64(size); + + mm->roots = kmalloc_array(mm->n_roots, + sizeof(struct i915_buddy_block *), + GFP_KERNEL); + if (!mm->roots) + goto out_free_list; + + offset = 0; + i = 0; + + /* + * Split into power-of-two blocks, in case we are given a size that is + * not itself a power-of-two. + */ + do { + struct i915_buddy_block *root; + unsigned int order; + u64 root_size; + + root_size = rounddown_pow_of_two(size); + order = ilog2(root_size) - ilog2(chunk_size); + + root = i915_block_alloc(NULL, order, offset); + if (!root) + goto out_free_roots; + + mark_free(mm, root); + + GEM_BUG_ON(i > mm->max_order); + GEM_BUG_ON(i915_buddy_block_size(mm, root) < chunk_size); + + mm->roots[i] = root; + + offset += root_size; + size -= root_size; + i++; + } while (size); + + return 0; + +out_free_roots: + while (i--) + i915_block_free(mm->roots[i]); + kfree(mm->roots); +out_free_list: + kfree(mm->free_list); + return -ENOMEM; +} + +void i915_buddy_fini(struct i915_buddy_mm *mm) +{ + int i; + + for (i = 0; i < mm->n_roots; ++i) { + GEM_WARN_ON(!i915_buddy_block_is_free(mm->roots[i])); + i915_block_free(mm->roots[i]); + } + + kfree(mm->roots); + kfree(mm->free_list); +} + +static int split_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + unsigned int block_order = i915_buddy_block_order(block) - 1; + u64 offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + GEM_BUG_ON(!i915_buddy_block_order(block)); + + block->left = i915_block_alloc(block, block_order, offset); + if (!block->left) + return -ENOMEM; + + block->right = i915_block_alloc(block, block_order, + offset + (mm->chunk_size << block_order)); + if (!block->right) { + i915_block_free(block->left); + return -ENOMEM; + } + + mark_free(mm, block->left); + mark_free(mm, block->right); + + mark_split(block); + + return 0; +} + +static struct i915_buddy_block * +get_buddy(struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static void __i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + while ((parent = block->parent)) { + struct i915_buddy_block *buddy; + + buddy = get_buddy(block); + + if (!i915_buddy_block_is_free(buddy)) + break; + + list_del(&buddy->link); + + i915_block_free(block); + i915_block_free(buddy); + + block = parent; + } + + mark_free(mm, block); +} + +void i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + __i915_buddy_free(mm, block); +} + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) +{ + struct i915_buddy_block *block, *on; + + list_for_each_entry_safe(block, on, objects, link) { + i915_buddy_free(mm, block); + cond_resched(); + } + INIT_LIST_HEAD(objects); +} + +/* + * Allocate power-of-two block. The order value here translates to: + * + * 0 = 2^0 * mm->chunk_size + * 1 = 2^1 * mm->chunk_size + * 2 = 2^2 * mm->chunk_size + * ... + */ +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) +{ + struct i915_buddy_block *block = NULL; + unsigned int i; + int err; + + for (i = order; i <= mm->max_order; ++i) { + block = list_first_entry_or_null(&mm->free_list[i], + struct i915_buddy_block, + link); + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + + while (i != order) { + err = split_block(mm, block); + if (unlikely(err)) + goto out_free; + + /* Go low */ + block = block->left; + i--; + } + + mark_allocated(block); + kmemleak_update_trace(block); + return block; + +out_free: + __i915_buddy_free(mm, block); + return ERR_PTR(err); +} + +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +/* + * Allocate range. Note that it's safe to chain together multiple alloc_ranges + * with the same blocks list. + * + * Intended for pre-allocating portions of the address space, for example to + * reserve a block for the initial framebuffer or similar, hence the expectation + * here is that i915_buddy_alloc() is still the main vehicle for + * allocations, so if that's not the case then the drm_mm range allocator is + * probably a much better fit, and so you should probably go use that instead. + */ +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *buddy; + LIST_HEAD(allocated); + LIST_HEAD(dfs); + u64 end; + int err; + int i; + + if (size < mm->chunk_size) + return -EINVAL; + + if (!IS_ALIGNED(size | start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + end = start + size - 1; + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct i915_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + block_start = i915_buddy_block_offset(block); + block_end = block_start + i915_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (i915_buddy_block_is_allocated(block)) { + err = -ENOSPC; + goto err_free; + } + + if (contains(start, end, block_start, block_end)) { + if (!i915_buddy_block_is_free(block)) { + err = -ENOSPC; + goto err_free; + } + + mark_allocated(block); + list_add_tail(&block->link, &allocated); + continue; + } + + if (!i915_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + list_splice_tail(&allocated, blocks); + return 0; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = get_buddy(block); + if (buddy && + (i915_buddy_block_is_free(block) && + i915_buddy_block_is_free(buddy))) + __i915_buddy_free(mm, block); + +err_free: + i915_buddy_free_list(mm, &allocated); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_buddy.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h new file mode 100644 index 000000000000..ed41f3507cdc --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_BUDDY_H__ +#define __I915_BUDDY_H__ + +#include <linux/bitops.h> +#include <linux/list.h> + +struct i915_buddy_block { +#define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define I915_BUDDY_ALLOCATED (1 << 10) +#define I915_BUDDY_FREE (2 << 10) +#define I915_BUDDY_SPLIT (3 << 10) +#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(9, 0) + u64 header; + + struct i915_buddy_block *left; + struct i915_buddy_block *right; + struct i915_buddy_block *parent; + + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through i915_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * i915_buddy_free* ownership is given back to the mm. + */ + struct list_head link; + struct list_head tmp_link; +}; + +#define I915_BUDDY_MAX_ORDER I915_BUDDY_HEADER_ORDER + +/* + * Binary Buddy System. + * + * Locking should be handled by the user, a simple mutex around + * i915_buddy_alloc* and i915_buddy_free* should suffice. + */ +struct i915_buddy_mm { + /* Maintain a free list for each order. */ + struct list_head *free_list; + + /* + * Maintain explicit binary tree(s) to track the allocation of the + * address space. This gives us a simple way of finding a buddy block + * and performing the potentially recursive merge step when freeing a + * block. Nodes are either allocated or free, in which case they will + * also exist on the respective free list. + */ + struct i915_buddy_block **roots; + + /* + * Anything from here is public, and remains static for the lifetime of + * the mm. Everything above is considered do-not-touch. + */ + unsigned int n_roots; + unsigned int max_order; + + /* Must be at least PAGE_SIZE */ + u64 chunk_size; + u64 size; +}; + +static inline u64 +i915_buddy_block_offset(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +i915_buddy_block_order(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_ORDER; +} + +static inline unsigned int +i915_buddy_block_state(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_STATE; +} + +static inline bool +i915_buddy_block_is_allocated(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_ALLOCATED; +} + +static inline bool +i915_buddy_block_is_free(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_FREE; +} + +static inline bool +i915_buddy_block_is_split(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_SPLIT; +} + +static inline u64 +i915_buddy_block_size(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + return mm->chunk_size << i915_buddy_block_order(block); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size); + +void i915_buddy_fini(struct i915_buddy_mm *mm); + +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order); + +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size); + +void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); + +#endif diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index a28bcd2d7c09..a0e437aa65b7 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -28,6 +28,7 @@ #include "gt/intel_engine.h" #include "i915_drv.h" +#include "i915_memcpy.h" /** * DOC: batch buffer command parser @@ -52,13 +53,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -83,9 +82,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -103,8 +102,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -112,7 +109,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -193,7 +189,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -208,14 +204,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -240,12 +235,12 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { /* * MI_BATCH_BUFFER_START requires some special handling. It's not * really a 'skip' action but it doesn't seem like it's worth adding - * a new action. See i915_parse_cmds(). + * a new action. See intel_engine_cmd_parser(). */ CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -312,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -329,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -373,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -411,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -445,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -462,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -611,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -630,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, +}; + +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -669,7 +731,7 @@ static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) return 0xFF; } - DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); + DRM_DEBUG("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); return 0; } @@ -692,7 +754,7 @@ static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) return 0xFF; } - DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); + DRM_DEBUG("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); return 0; } @@ -705,7 +767,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) else if (client == INSTR_BC_CLIENT) return 0xFF; - DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); return 0; } @@ -866,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN(engine->i915, 7)) + if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && + engine->class == COPY_ENGINE_CLASS)) return; switch (engine->class) { case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -887,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VIDEO_DECODE_CLASS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case COPY_ENGINE_CLASS: - if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN(engine->i915, 9)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN(engine->i915, 9)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VIDEO_ENHANCEMENT_CLASS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -942,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -954,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!intel_engine_needs_cmd_parser(engine)) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1028,119 +1112,98 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - for (; count > 0; ++table, --count) { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, - u32 batch_start_offset, - u32 batch_len, - bool *needs_clflush_after) + u32 offset, u32 length) { - unsigned int src_needs_clflush; - unsigned int dst_needs_clflush; + bool needs_clflush; void *dst, *src; int ret; - ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); - if (ret) - return ERR_PTR(ret); - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); - i915_gem_object_finish_access(dst_obj); if (IS_ERR(dst)) return dst; - ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + ret = i915_gem_object_pin_pages(src_obj); if (ret) { i915_gem_object_unpin_map(dst_obj); return ERR_PTR(ret); } + needs_clflush = + !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); + src = ERR_PTR(-ENODEV); - if (src_needs_clflush && - i915_can_memcpy_from_wc(NULL, batch_start_offset, 0)) { + if (needs_clflush && i915_has_memcpy_from_wc()) { src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); if (!IS_ERR(src)) { - i915_memcpy_from_wc(dst, - src + batch_start_offset, - ALIGN(batch_len, 16)); + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); i915_gem_object_unpin_map(src_obj); } } if (IS_ERR(src)) { void *ptr; - int offset, n; - - offset = offset_in_page(batch_start_offset); + int x, n; - /* We can avoid clflushing partial cachelines before the write + /* + * We can avoid clflushing partial cachelines before the write * if we only every write full cache-lines. Since we know that * both the source and destination are in multiples of * PAGE_SIZE, we can simply round up to the next cacheline. * We don't care about copying too much here as we only * validate up to the end of the batch. */ - if (dst_needs_clflush & CLFLUSH_BEFORE) - batch_len = roundup(batch_len, - boot_cpu_data.x86_clflush_size); + if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + length = round_up(length, + boot_cpu_data.x86_clflush_size); ptr = dst; - for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { - int len = min_t(int, batch_len, PAGE_SIZE - offset); + x = offset_in_page(offset); + for (n = offset >> PAGE_SHIFT; length; n++) { + int len = min_t(int, length, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); - if (src_needs_clflush) - drm_clflush_virt_range(src + offset, len); - memcpy(ptr, src + offset, len); + if (needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); kunmap_atomic(src); ptr += len; - batch_len -= len; - offset = 0; + length -= len; + x = 0; } } - i915_gem_object_finish_access(src_obj); + i915_gem_object_unpin_pages(src_obj); /* dst_obj is returned with vmap pinned */ - *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; - return dst; } static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; if (desc->flags & CMD_DESC_REJECT) { - DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); - return false; - } - - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); + DRM_DEBUG("CMD: Rejected command: 0x%08X\n", *cmd); return false; } @@ -1157,11 +1220,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", - reg_addr, *cmd, engine->name); + DRM_DEBUG("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", + reg_addr, *cmd, engine->name); return false; } @@ -1171,22 +1234,22 @@ static bool check_cmd(const struct intel_engine_cs *engine, */ if (reg->mask) { if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { - DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", - reg_addr); + DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n", + reg_addr); return false; } if (desc->cmd.value == MI_LOAD_REGISTER_REG) { - DRM_DEBUG_DRIVER("CMD: Rejected LRR to masked register 0x%08X\n", - reg_addr); + DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n", + reg_addr); return false; } if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && (offset + 2 > length || (cmd[offset + 1] & reg->mask) != reg->value)) { - DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", - reg_addr); + DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n", + reg_addr); return false; } } @@ -1213,8 +1276,8 @@ static bool check_cmd(const struct intel_engine_cs *engine, } if (desc->bits[i].offset >= length) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", - *cmd, engine->name); + DRM_DEBUG("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", + *cmd, engine->name); return false; } @@ -1222,11 +1285,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n", - *cmd, - desc->bits[i].mask, - desc->bits[i].expected, - dword, engine->name); + DRM_DEBUG("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n", + *cmd, + desc->bits[i].mask, + desc->bits[i].expected, + dword, engine->name); return false; } } @@ -1235,16 +1298,98 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(u32 *cmd, u32 offset, u32 length, + u32 batch_length, + u64 batch_addr, + u64 shadow_addr, + const unsigned long *jump_whitelist) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (!jump_whitelist) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64 *)(cmd + 1); + jump_offset = jump_target - batch_addr; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_length) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_length is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64 *)(cmd + 1) = shadow_addr + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + + if (!test_bit(target_cmd_index, jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static unsigned long *alloc_whitelist(u32 batch_length) +{ + unsigned long *jmp; + + /* + * We expect batch_length to be less than 256KiB for known users, + * i.e. we need at most an 8KiB bitmap allocation which should be + * reasonably cheap due to kmalloc caches. + */ + + /* Prefer to report transient allocation failure rather than hit oom */ + jmp = bitmap_zalloc(DIV_ROUND_UP(batch_length, sizeof(u32)), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!jmp) + return ERR_PTR(-ENOMEM); + + return jmp; +} + #define LENGTH_BIAS 2 +static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); +} + /** - * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute - * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question - * @batch_start_offset: byte offset in the batch at which execution starts - * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @batch: the batch buffer in question + * @batch_offset: byte offset in the batch at which execution starts + * @batch_length: length of the commands in batch_obj + * @shadow: validated copy of the batch buffer in question + * @trampoline: whether to emit a conditional trampoline at the end of the batch * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1253,90 +1398,146 @@ static bool check_cmd(const struct intel_engine_cs *engine, * if the batch appears legal but should use hardware parsing */ int intel_engine_cmd_parser(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master) + struct i915_vma *batch, + u32 batch_offset, + u32 batch_length, + struct i915_vma *shadow, + bool trampoline) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; - bool needs_clflush_after = false; + unsigned long *jump_whitelist; + u64 batch_addr, shadow_addr; int ret = 0; - cmd = copy_batch(shadow_batch_obj, batch_obj, - batch_start_offset, batch_len, - &needs_clflush_after); + GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); + GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); + GEM_BUG_ON(range_overflows_t(u64, batch_offset, batch_length, + batch->size)); + GEM_BUG_ON(!batch_length); + + cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length); if (IS_ERR(cmd)) { - DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); + DRM_DEBUG("CMD: Failed to copy batch\n"); return PTR_ERR(cmd); } + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); + + shadow_addr = gen8_canonical_addr(shadow->node.start); + batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra * space. Parsing should be faster in some cases this way. */ - batch_end = cmd + (batch_len / sizeof(*batch_end)); + batch_end = cmd + batch_length / sizeof(*batch_end); do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { - DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", - *cmd); + DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; break; } - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; - } - if (desc->flags & CMD_DESC_FIXED) length = desc->length.fixed; else - length = ((*cmd & desc->length.mask) + LENGTH_BIAS); + length = (*cmd & desc->length.mask) + LENGTH_BIAS; if ((batch_end - cmd) < length) { - DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", - *cmd, - length, - batch_end - cmd); + DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", + *cmd, + length, + batch_end - cmd); ret = -EINVAL; break; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { + if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; break; } + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(cmd, offset, length, batch_length, + batch_addr, shadow_addr, + jump_whitelist); + break; + } + + if (!IS_ERR_OR_NULL(jump_whitelist)) + __set_bit(offset, jump_whitelist); + cmd += length; + offset += length; if (cmd >= batch_end) { - DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + DRM_DEBUG("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; break; } } while (1); - i915_gem_object_unpin_map(shadow_batch_obj); + if (trampoline) { + /* + * With the trampoline, the shadow is executed twice. + * + * 1 - starting at offset 0, in privileged mode + * 2 - starting at offset batch_len, as non-privileged + * + * Only if the batch is valid and safe to execute, do we + * allow the first privileged execution to proceed. If not, + * we terminate the first batch and use the second batchbuffer + * entry to chain to the original unsafe non-privileged batch, + * leaving it to the HW to validate. + */ + *batch_end = MI_BATCH_BUFFER_END; + + if (ret) { + /* Batch unsafe to execute with privileges, cancel! */ + cmd = page_mask_bits(shadow->obj->mm.mapping); + *cmd = MI_BATCH_BUFFER_END; + + /* If batch is unsafe but valid, jump to the original */ + if (ret == -EACCES) { + unsigned int flags; + + flags = MI_BATCH_NON_SECURE_I965; + if (IS_HASWELL(engine->i915)) + flags = MI_BATCH_NON_SECURE_HSW; + + GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7)); + __gen6_emit_bb_start(batch_end, + batch_addr, + flags); + + ret = 0; /* allow execution */ + } + } + + if (shadow_needs_clflush(shadow->obj)) + drm_clflush_virt_range(batch_end, 8); + } + + if (shadow_needs_clflush(shadow->obj)) { + void *ptr = page_mask_bits(shadow->obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } @@ -1352,12 +1553,11 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - enum intel_engine_id id; bool active = false; /* If the command parser is not enabled, report 0 - unsupported */ - for_each_engine(engine, dev_priv, id) { - if (intel_engine_needs_cmd_parser(engine)) { + for_each_uabi_engine(engine, dev_priv) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } @@ -1382,6 +1582,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 62cf34db9280..d5a9b8a964c2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,7 @@ #include <drm/drm_debugfs.h> #include <drm/drm_fourcc.h> +#include "display/intel_display_types.h" #include "display/intel_dp.h" #include "display/intel_fbc.h" #include "display/intel_hdcp.h" @@ -39,13 +40,17 @@ #include "display/intel_psr.h" #include "gem/i915_gem_context.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" +#include "gt/intel_rc6.h" +#include "gt/intel_rps.h" +#include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" #include "i915_irq.h" +#include "i915_trace.h" #include "intel_csr.h" -#include "intel_drv.h" -#include "intel_guc_submission.h" #include "intel_pm.h" #include "intel_sideband.h" @@ -56,17 +61,14 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) static int i915_capabilities(struct seq_file *m, void *data) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_device_info *info = INTEL_INFO(dev_priv); + struct drm_i915_private *i915 = node_to_i915(m->private); struct drm_printer p = drm_seq_file_printer(m); - seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); - seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); - seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915)); - intel_device_info_dump_flags(info, &p); - intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p); - intel_driver_caps_print(&dev_priv->caps, &p); + intel_device_info_print_static(INTEL_INFO(i915), &p); + intel_device_info_print_runtime(RUNTIME_INFO(i915), &p); + intel_driver_caps_print(&i915->caps, &p); kernel_param_lock(THIS_MODULE); i915_params_dump(&i915_modparams, &p); @@ -75,16 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data) return 0; } -static char get_active_flag(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_is_active(obj) ? '*' : ' '; -} - -static char get_pin_flag(struct drm_i915_gem_object *obj) -{ - return obj->pin_global ? 'p' : ' '; -} - static char get_tiling_flag(struct drm_i915_gem_object *obj) { switch (i915_gem_object_get_tiling(obj)) { @@ -97,7 +89,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return obj->userfault_count ? 'g' : ' '; + return READ_ONCE(obj->userfault_count) ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -141,13 +133,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_engine_cs *engine; struct i915_vma *vma; - unsigned int frontbuffer_bits; int pin_count = 0; - seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s", + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, - get_active_flag(obj), - get_pin_flag(obj), get_tiling_flag(obj), get_global_flag(obj), get_pin_mapped_flag(obj), @@ -216,9 +205,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) } } if (vma->fence) - seq_printf(m, " , fence: %d%s", - vma->fence->id, - i915_active_request_isset(&vma->last_fence) ? "*" : ""); + seq_printf(m, " , fence: %d", vma->fence->id); seq_puts(m, ")"); spin_lock(&obj->vma.lock); @@ -228,23 +215,18 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); - if (obj->pin_global) - seq_printf(m, " (global)"); + if (i915_gem_object_is_framebuffer(obj)) + seq_printf(m, " (fb)"); engine = i915_gem_object_last_write_engine(obj); if (engine) seq_printf(m, " (%s)", engine->name); - - frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); - if (frontbuffer_bits) - seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } struct file_stats { struct i915_address_space *vm; unsigned long count; u64 total, unbound; - u64 global, shared; u64 active, inactive; u64 closed; }; @@ -255,101 +237,113 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!kref_get_unless_zero(&obj->base.refcount)) + return 0; stats->count++; stats->total += obj->base.size; if (!atomic_read(&obj->bind_count)) stats->unbound += obj->base.size; - if (obj->base.name || obj->base.dma_buf) - stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_ggtt(vma)) { - stats->global += vma->node.size; - } else { - if (vma->vm != stats->vm) + spin_lock(&obj->vma.lock); + if (!stats->vm) { + for_each_ggtt_vma(vma, obj) { + if (!drm_mm_node_allocated(&vma->node)) continue; - } - if (i915_vma_is_active(vma)) - stats->active += vma->node.size; - else - stats->inactive += vma->node.size; + if (i915_vma_is_active(vma)) + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; - if (i915_vma_is_closed(vma)) - stats->closed += vma->node.size; + if (i915_vma_is_closed(vma)) + stats->closed += vma->node.size; + } + } else { + struct rb_node *p = obj->vma.tree.rb_node; + + while (p) { + long cmp; + + vma = rb_entry(p, typeof(*vma), obj_node); + cmp = i915_vma_compare(vma, stats->vm, NULL); + if (cmp == 0) { + if (drm_mm_node_allocated(&vma->node)) { + if (i915_vma_is_active(vma)) + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; + + if (i915_vma_is_closed(vma)) + stats->closed += vma->node.size; + } + break; + } + if (cmp < 0) + p = p->rb_right; + else + p = p->rb_left; + } } + spin_unlock(&obj->vma.lock); + i915_gem_object_put(obj); return 0; } #define print_file_stats(m, name, stats) do { \ if (stats.count) \ - seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \ + seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu unbound, %llu closed)\n", \ name, \ stats.count, \ stats.total, \ stats.active, \ stats.inactive, \ - stats.global, \ - stats.shared, \ stats.unbound, \ stats.closed); \ } while (0) -static void print_batch_pool_stats(struct seq_file *m, - struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - struct file_stats stats = {}; - enum intel_engine_id id; - int j; - - for_each_engine(engine, dev_priv, id) { - for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) { - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) - per_file_stats(0, obj, &stats); - } - } - - print_file_stats(m, "[k]batch pool", stats); -} - static void print_context_stats(struct seq_file *m, struct drm_i915_private *i915) { struct file_stats kstats = {}; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &i915->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (ce->state) - per_file_stats(0, ce->state->obj, &kstats); - if (ce->ring) + if (intel_context_pin_if_active(ce)) { + rcu_read_lock(); + if (ce->state) + per_file_stats(0, + ce->state->obj, &kstats); per_file_stats(0, ce->ring->vma->obj, &kstats); + rcu_read_unlock(); + intel_context_unpin(ce); + } } i915_gem_context_unlock_engines(ctx); if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { .vm = ctx->vm, }; + struct file_stats stats = { + .vm = rcu_access_pointer(ctx->vm), + }; struct drm_file *file = ctx->file_priv->file; struct task_struct *task; char name[80]; - spin_lock(&file->table_lock); + rcu_read_lock(); idr_for_each(&file->object_idr, per_file_stats, &stats); - spin_unlock(&file->table_lock); + rcu_read_unlock(); rcu_read_lock(); task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID); @@ -359,7 +353,12 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); print_file_stats(m, "[k]contexts", kstats); } @@ -367,66 +366,19 @@ static void print_context_stats(struct seq_file *m, static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); - int ret; + struct intel_memory_region *mr; + enum intel_region_id id; - seq_printf(m, "%u shrinkable objects, %llu bytes\n", + seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n", i915->mm.shrink_count, + atomic_read(&i915->mm.free_count), i915->mm.shrink_memory); - + for_each_memory_region(mr, i915, id) + seq_printf(m, "%s: total:%pa, available:%pa bytes\n", + mr->name, &mr->total, &mr->avail); seq_putc(m, '\n'); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - print_batch_pool_stats(m, i915); print_context_stats(m, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return 0; -} - -static int i915_gem_batch_pool_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int total = 0; - int ret, j; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - for_each_engine(engine, dev_priv, id) { - for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) { - int count; - - count = 0; - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) - count++; - seq_printf(m, "%s cache[%d]: %d objects\n", - engine->name, j, count); - - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) { - seq_puts(m, " "); - describe_obj(m, obj); - seq_putc(m, '\n'); - } - - total += count; - } - } - - seq_printf(m, "total: %d\n", total); - - mutex_unlock(&dev->struct_mutex); return 0; } @@ -434,7 +386,7 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) static void gen8_display_interrupt_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - int pipe; + enum pipe pipe; for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; @@ -487,7 +439,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; - enum intel_engine_id id; intel_wakeref_t wakeref; int i, pipe; @@ -586,6 +537,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data) gen8_display_interrupt_info(m); } else if (IS_VALLEYVIEW(dev_priv)) { + intel_wakeref_t pref; + seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); seq_printf(m, "Display IIR:\t%08x\n", @@ -596,7 +549,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IMR)); for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; - intel_wakeref_t pref; power_domain = POWER_DOMAIN_PIPE(pipe); pref = intel_display_power_get_if_enabled(dev_priv, @@ -630,12 +582,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "PM IMR:\t\t%08x\n", I915_READ(GEN6_PMIMR)); + pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref); } else if (!HAS_PCH_SPLIT(dev_priv)) { seq_printf(m, "Interrupt enable: %08x\n", @@ -690,7 +644,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GEN11_GUNIT_CSME_INTR_MASK)); } else if (INTEL_GEN(dev_priv) >= 6) { - for_each_engine(engine, dev_priv, id) { + for_each_uabi_engine(engine, dev_priv) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, ENGINE_READ(engine, RING_IMR)); @@ -711,10 +665,11 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) rcu_read_lock(); for (i = 0; i < i915->ggtt.num_fences; i++) { - struct i915_vma *vma = i915->ggtt.fence_regs[i].vma; + struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; + struct i915_vma *vma = reg->vma; seq_printf(m, "Fence %d, pin count = %d, object = ", - i, i915->ggtt.fence_regs[i].pin_count); + i, atomic_read(®->pin_count)); if (!vma) seq_puts(m, "unused"); else @@ -730,7 +685,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) static ssize_t gpu_state_read(struct file *file, char __user *ubuf, size_t count, loff_t *pos) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; ssize_t ret; void *buf; @@ -743,7 +698,7 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf, if (!buf) return -ENOMEM; - ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count); + ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); if (ret <= 0) goto out; @@ -759,19 +714,19 @@ out: static int gpu_state_release(struct inode *inode, struct file *file) { - i915_gpu_state_put(file->private_data); + i915_gpu_coredump_put(file->private_data); return 0; } static int i915_gpu_info_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - struct i915_gpu_state *gpu; + struct i915_gpu_coredump *gpu; intel_wakeref_t wakeref; gpu = NULL; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_capture_gpu_state(i915); + gpu = i915_gpu_coredump(i915); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -793,7 +748,7 @@ i915_error_state_write(struct file *filp, size_t cnt, loff_t *ppos) { - struct i915_gpu_state *error = filp->private_data; + struct i915_gpu_coredump *error = filp->private_data; if (!error) return 0; @@ -806,7 +761,7 @@ i915_error_state_write(struct file *filp, static int i915_error_state_open(struct inode *inode, struct file *file) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; error = i915_first_error_state(inode->i_private); if (IS_ERR(error)) @@ -830,7 +785,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore *uncore = &dev_priv->uncore; - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; int ret = 0; @@ -866,23 +821,23 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -916,7 +871,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) else reqf >>= 25; } - reqf = intel_gpu_freq(dev_priv, reqf); + reqf = intel_gpu_freq(rps, reqf); rpmodectl = I915_READ(GEN6_RP_CONTROL); rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD); @@ -929,8 +884,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - cagf = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, rpstat)); + cagf = intel_rps_read_actual_frequency(rps); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1007,37 +961,37 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1050,115 +1004,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) return ret; } -static void i915_instdone_info(struct drm_i915_private *dev_priv, - struct seq_file *m, - struct intel_instdone *instdone) -{ - int slice; - int subslice; - - seq_printf(m, "\t\tINSTDONE: 0x%08x\n", - instdone->instdone); - - if (INTEL_GEN(dev_priv) <= 3) - return; - - seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n", - instdone->slice_common); - - if (INTEL_GEN(dev_priv) <= 6) - return; - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) - seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, instdone->sampler[slice][subslice]); - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) - seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, instdone->row[slice][subslice]); -} - -static int i915_hangcheck_info(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_engine_cs *engine; - u64 acthd[I915_NUM_ENGINES]; - struct intel_instdone instdone; - intel_wakeref_t wakeref; - enum intel_engine_id id; - - seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags); - if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - seq_puts(m, "\tWedged\n"); - if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) - seq_puts(m, "\tDevice (global) reset in progress\n"); - - if (!i915_modparams.enable_hangcheck) { - seq_puts(m, "Hangcheck disabled\n"); - return 0; - } - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - for_each_engine(engine, dev_priv, id) - acthd[id] = intel_engine_get_active_head(engine); - - intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone); - } - - if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) - seq_printf(m, "Hangcheck active, timer fires in %dms\n", - jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - - jiffies)); - else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) - seq_puts(m, "Hangcheck active, work pending\n"); - else - seq_puts(m, "Hangcheck inactive\n"); - - seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); - - for_each_engine(engine, dev_priv, id) { - seq_printf(m, "%s: %d ms ago\n", - engine->name, - jiffies_to_msecs(jiffies - - engine->hangcheck.action_timestamp)); - - seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", - (long long)engine->hangcheck.acthd, - (long long)acthd[id]); - - if (engine->id == RCS0) { - seq_puts(m, "\tinstdone read =\n"); - - i915_instdone_info(dev_priv, m, &instdone); - - seq_puts(m, "\tinstdone accu =\n"); - - i915_instdone_info(dev_priv, m, - &engine->hangcheck.instdone); - } - } - - return 0; -} - -static int i915_reset_info(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - seq_printf(m, "full gpu reset = %u\n", i915_reset_count(error)); - - for_each_engine(engine, dev_priv, id) { - seq_printf(m, "%s = %u\n", engine->name, - i915_reset_engine_count(error, engine)); - } - - return 0; -} - -static int ironlake_drpc_info(struct seq_file *m) +static int ilk_drpc_info(struct seq_file *m) { struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_uncore *uncore = &i915->uncore; @@ -1224,7 +1070,7 @@ static int i915_forcewake_domains(struct seq_file *m, void *data) unsigned int tmp; seq_printf(m, "user.bypass_count = %u\n", - uncore->user_forcewake.count); + uncore->user_forcewake_count); for_each_fw_domain(fw_domain, uncore, tmp) seq_printf(m, "%s.wake_count = %u\n", @@ -1238,11 +1084,13 @@ static void print_rc6_res(struct seq_file *m, const char *title, const i915_reg_t reg) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); + intel_wakeref_t wakeref; - seq_printf(m, "%s %u (%llu us)\n", - title, I915_READ(reg), - intel_rc6_residency_us(dev_priv, reg)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(&i915->uncore, reg), + intel_rc6_residency_us(&i915->gt.rc6, reg)); } static int vlv_drpc_info(struct seq_file *m) @@ -1364,7 +1212,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) else if (INTEL_GEN(dev_priv) >= 6) err = gen6_drpc_info(m); else - err = ironlake_drpc_info(m); + err = ilk_drpc_info(m); } return err; @@ -1517,34 +1365,10 @@ static int i915_sr_status(struct seq_file *m, void *unused) return 0; } -static int i915_emon_status(struct seq_file *m, void *unused) -{ - struct drm_i915_private *i915 = node_to_i915(m->private); - intel_wakeref_t wakeref; - - if (!IS_GEN(i915, 5)) - return -ENODEV; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - unsigned long temp, chipset, gfx; - - temp = i915_mch_val(i915); - chipset = i915_chipset_val(i915); - gfx = i915_gfx_val(i915); - - seq_printf(m, "GMCH temp: %ld\n", temp); - seq_printf(m, "Chipset power: %ld\n", chipset); - seq_printf(m, "GFX power: %ld\n", gfx); - seq_printf(m, "Total power: %ld\n", chipset + gfx); - } - - return 0; -} - static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; @@ -1569,10 +1393,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) GEN6_PCODE_READ_MIN_FREQ_TABLE, &ia_freq, NULL); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", - intel_gpu_freq(dev_priv, (gpu_freq * - (IS_GEN9_BC(dev_priv) || - INTEL_GEN(dev_priv) >= 10 ? - GEN9_FREQ_SCALER : 1))), + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(dev_priv) || + INTEL_GEN(dev_priv) >= 10 ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -1583,21 +1408,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) static int i915_opregion(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_opregion *opregion = &dev_priv->opregion; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; + struct intel_opregion *opregion = &node_to_i915(m->private)->opregion; if (opregion->header) seq_write(m, opregion->header, OPREGION_SIZE); - mutex_unlock(&dev->struct_mutex); - -out: return 0; } @@ -1617,11 +1432,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) { @@ -1656,7 +1466,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } mutex_unlock(&dev->mode_config.fb_lock); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -1669,23 +1478,20 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) static int i915_context_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + struct drm_i915_private *i915 = node_to_i915(m->private); + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + seq_puts(m, "HW context "); - if (!list_empty(&ctx->hw_id_link)) - seq_printf(m, "%x [pin %u]", ctx->hw_id, - atomic_read(&ctx->hw_id_pin_count)); if (ctx->pid) { struct task_struct *task; @@ -1706,19 +1512,24 @@ static int i915_context_status(struct seq_file *m, void *unused) for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - seq_printf(m, "%s: ", ce->engine->name); - if (ce->state) - describe_obj(m, ce->state->obj); - if (ce->ring) + if (intel_context_pin_if_active(ce)) { + seq_printf(m, "%s: ", ce->engine->name); + if (ce->state) + describe_obj(m, ce->state->obj); describe_ctx_ring(m, ce->ring); - seq_putc(m, '\n'); + seq_putc(m, '\n'); + intel_context_unpin(ce); + } } i915_gem_context_unlock_engines(ctx); seq_putc(m, '\n'); - } - mutex_unlock(&dev->struct_mutex); + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); return 0; } @@ -1756,9 +1567,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data) wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_x)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_x)); seq_printf(m, "bit6 swizzle for Y-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_y)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_y)); if (IS_GEN_RANGE(dev_priv, 3, 4)) { seq_printf(m, "DDC = 0x%08x\n", @@ -1813,22 +1624,7 @@ static const char *rps_power_to_str(unsigned int power) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 act_freq = rps->cur_freq; - intel_wakeref_t wakeref; - - with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm, wakeref) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - vlv_punit_get(dev_priv); - act_freq = vlv_punit_read(dev_priv, - PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(dev_priv); - act_freq = (act_freq >> 8) & 0xff; - } else { - act_freq = intel_get_cagf(dev_priv, - I915_READ(GEN6_RPSTAT1)); - } - } + struct intel_rps *rps = &dev_priv->gt.rps; seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); @@ -1836,17 +1632,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Frequency requested %d, actual %d\n", - intel_gpu_freq(dev_priv, rps->cur_freq), - intel_gpu_freq(dev_priv, act_freq)); + intel_gpu_freq(rps, rps->cur_freq), + intel_rps_read_actual_frequency(rps)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, rps->min_freq), - intel_gpu_freq(dev_priv, rps->min_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, rps->idle_freq), - intel_gpu_freq(dev_priv, rps->efficient_freq), - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); @@ -1894,11 +1690,11 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) intel_wakeref_t wakeref; struct drm_printer p; - if (!HAS_HUC(dev_priv)) + if (!HAS_GT_UC(dev_priv)) return -ENODEV; p = drm_seq_file_printer(m); - intel_uc_fw_dump(&dev_priv->huc.fw, &p); + intel_uc_fw_dump(&dev_priv->gt.uc.huc.fw, &p); with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); @@ -1912,11 +1708,11 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) intel_wakeref_t wakeref; struct drm_printer p; - if (!HAS_GUC(dev_priv)) + if (!HAS_GT_UC(dev_priv)) return -ENODEV; p = drm_seq_file_printer(m); - intel_uc_fw_dump(&dev_priv->guc.fw, &p); + intel_uc_fw_dump(&dev_priv->gt.uc.guc.fw, &p); with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { u32 tmp = I915_READ(GUC_STATUS); @@ -1959,11 +1755,11 @@ stringify_guc_log_type(enum guc_log_buffer_type type) static void i915_guc_log_info(struct seq_file *m, struct drm_i915_private *dev_priv) { - struct intel_guc_log *log = &dev_priv->guc.log; + struct intel_guc_log *log = &dev_priv->gt.uc.guc.log; enum guc_log_buffer_type type; - if (!intel_guc_log_relay_enabled(log)) { - seq_puts(m, "GuC log relay disabled\n"); + if (!intel_guc_log_relay_created(log)) { + seq_puts(m, "GuC log relay not created\n"); return; } @@ -1980,55 +1776,15 @@ static void i915_guc_log_info(struct seq_file *m, } } -static void i915_guc_client_info(struct seq_file *m, - struct drm_i915_private *dev_priv, - struct intel_guc_client *client) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u64 tot = 0; - - seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", - client->priority, client->stage_id, client->proc_desc_offset); - seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n", - client->doorbell_id, client->doorbell_offset); - - for_each_engine(engine, dev_priv, id) { - u64 submissions = client->submissions[id]; - tot += submissions; - seq_printf(m, "\tSubmissions: %llu %s\n", - submissions, engine->name); - } - seq_printf(m, "\tTotal: %llu\n", tot); -} - static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->guc; if (!USES_GUC(dev_priv)) return -ENODEV; i915_guc_log_info(m, dev_priv); - if (!USES_GUC_SUBMISSION(dev_priv)) - return 0; - - GEM_BUG_ON(!guc->execbuf_client); - - seq_printf(m, "\nDoorbell map:\n"); - seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); - seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline); - - seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); - i915_guc_client_info(m, dev_priv, guc->execbuf_client); - if (guc->preempt_client) { - seq_printf(m, "\nGuC preempt client @ %p:\n", - guc->preempt_client); - i915_guc_client_info(m, dev_priv, guc->preempt_client); - } - /* Add more as required ... */ return 0; @@ -2037,10 +1793,8 @@ static int i915_guc_info(struct seq_file *m, void *data) static int i915_guc_stage_pool(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->guc; + const struct intel_guc *guc = &dev_priv->gt.uc.guc; struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr; - struct intel_guc_client *client = guc->execbuf_client; - intel_engine_mask_t tmp; int index; if (!USES_GUC_SUBMISSION(dev_priv)) @@ -2069,7 +1823,7 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) desc->wq_addr, desc->wq_size); seq_putc(m, '\n'); - for_each_engine_masked(engine, dev_priv, client->engines, tmp) { + for_each_uabi_engine(engine, dev_priv) { u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -2097,13 +1851,13 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) u32 *log; int i = 0; - if (!HAS_GUC(dev_priv)) + if (!HAS_GT_UC(dev_priv)) return -ENODEV; if (dump_load_err) - obj = dev_priv->guc.load_err_log; - else if (dev_priv->guc.log.vma) - obj = dev_priv->guc.log.vma->obj; + obj = dev_priv->gt.uc.load_err_log; + else if (dev_priv->gt.uc.guc.log.vma) + obj = dev_priv->gt.uc.guc.log.vma->obj; if (!obj) return 0; @@ -2134,7 +1888,7 @@ static int i915_guc_log_level_get(void *data, u64 *val) if (!USES_GUC(dev_priv)) return -ENODEV; - *val = intel_guc_log_get_level(&dev_priv->guc.log); + *val = intel_guc_log_get_level(&dev_priv->gt.uc.guc.log); return 0; } @@ -2146,7 +1900,7 @@ static int i915_guc_log_level_set(void *data, u64 val) if (!USES_GUC(dev_priv)) return -ENODEV; - return intel_guc_log_set_level(&dev_priv->guc.log, val); + return intel_guc_log_set_level(&dev_priv->gt.uc.guc.log, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops, @@ -2155,14 +1909,16 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops, static int i915_guc_log_relay_open(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; + struct drm_i915_private *i915 = inode->i_private; + struct intel_guc *guc = &i915->gt.uc.guc; + struct intel_guc_log *log = &guc->log; - if (!USES_GUC(dev_priv)) + if (!intel_guc_is_running(guc)) return -ENODEV; - file->private_data = &dev_priv->guc.log; + file->private_data = log; - return intel_guc_log_relay_open(&dev_priv->guc.log); + return intel_guc_log_relay_open(log); } static ssize_t @@ -2172,18 +1928,31 @@ i915_guc_log_relay_write(struct file *filp, loff_t *ppos) { struct intel_guc_log *log = filp->private_data; + int val; + int ret; - intel_guc_log_relay_flush(log); + ret = kstrtoint_from_user(ubuf, cnt, 0, &val); + if (ret < 0) + return ret; - return cnt; + /* + * Enable and start the guc log relay on value of 1. + * Flush log relay for any other value. + */ + if (val == 1) + ret = intel_guc_log_relay_start(log); + else + intel_guc_log_relay_flush(log); + + return ret ?: cnt; } static int i915_guc_log_relay_release(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; - - intel_guc_log_relay_close(&dev_priv->guc.log); + struct drm_i915_private *i915 = inode->i_private; + struct intel_guc *guc = &i915->gt.uc.guc; + intel_guc_log_relay_close(&guc->log); return 0; } @@ -2210,7 +1979,7 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data) struct drm_connector *connector = m->private; struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); int ret; if (!CAN_PSR(dev_priv)) { @@ -2258,7 +2027,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "BUF_ON", "TG_ON" }; - val = I915_READ(EDP_PSR2_STATUS); + val = I915_READ(EDP_PSR2_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >> EDP_PSR2_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2274,7 +2043,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "SRDOFFACK", "SRDENT_ON", }; - val = I915_READ(EDP_PSR_STATUS); + val = I915_READ(EDP_PSR_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR_STATUS_STATE_MASK) >> EDP_PSR_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2313,14 +2082,18 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) status = "disabled"; seq_printf(m, "PSR mode: %s\n", status); - if (!psr->enabled) + if (!psr->enabled) { + seq_printf(m, "PSR sink not reliable: %s\n", + yesno(psr->sink_not_reliable)); + goto unlock; + } if (psr->psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR2_ENABLE; } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR_ENABLE; } seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", @@ -2333,7 +2106,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * SKL+ Perf counter is reset to 0 everytime DC state is entered */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; + val = I915_READ(EDP_PSR_PERF_CNT(dev_priv->psr.transcoder)); + val &= EDP_PSR_PERF_CNT_MASK; seq_printf(m, "Performance counter: %u\n", val); } @@ -2351,8 +2125,11 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * Reading all 3 registers before hand to minimize crossing a * frame boundary between register reads */ - for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) - su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame)); + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) { + val = I915_READ(PSR2_SU_STATUS(dev_priv->psr.transcoder, + frame)); + su_frames_val[frame / 3] = val; + } seq_puts(m, "Frame:\tPSR2 SU blocks:\n"); @@ -2499,6 +2276,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); intel_wakeref_t wakeref; struct intel_csr *csr; + i915_reg_t dc5_reg, dc6_reg = {}; if (!HAS_CSR(dev_priv)) return -ENODEV; @@ -2516,15 +2294,26 @@ static int i915_dmc_info(struct seq_file *m, void *unused) seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); - if (WARN_ON(INTEL_GEN(dev_priv) > 11)) - goto out; + if (INTEL_GEN(dev_priv) >= 12) { + dc5_reg = TGL_DMC_DEBUG_DC5_COUNT; + dc6_reg = TGL_DMC_DEBUG_DC6_COUNT; + /* + * NOTE: DMC_DEBUG3 is a general purpose reg. + * According to B.Specs:49196 DMC f/w reuses DC5/6 counter + * reg for DC3CO debugging and validation, + * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter. + */ + seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3)); + } else { + dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : + SKL_CSR_DC3_DC5_COUNT; + if (!IS_GEN9_LP(dev_priv)) + dc6_reg = SKL_CSR_DC5_DC6_COUNT; + } - seq_printf(m, "DC3 -> DC5 count: %d\n", - I915_READ(IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : - SKL_CSR_DC3_DC5_COUNT)); - if (!IS_GEN9_LP(dev_priv)) - seq_printf(m, "DC5 -> DC6 count: %d\n", - I915_READ(SKL_CSR_DC5_DC6_COUNT)); + seq_printf(m, "DC3 -> DC5 count: %d\n", I915_READ(dc5_reg)); + if (dc6_reg.reg) + seq_printf(m, "DC5 -> DC6 count: %d\n", I915_READ(dc6_reg)); out: seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0))); @@ -2537,7 +2326,7 @@ out: } static void intel_seq_print_mode(struct seq_file *m, int tabs, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { int i; @@ -2548,66 +2337,61 @@ static void intel_seq_print_mode(struct seq_file *m, int tabs, } static void intel_encoder_info(struct seq_file *m, - struct intel_crtc *intel_crtc, - struct intel_encoder *intel_encoder) + struct intel_crtc *crtc, + struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_crtc *crtc = &intel_crtc->base; - struct intel_connector *intel_connector; - struct drm_encoder *encoder; - - encoder = &intel_encoder->base; - seq_printf(m, "\tencoder %d: type: %s, connectors:\n", - encoder->base.id, encoder->name); - for_each_connector_on_encoder(dev, encoder, intel_connector) { - struct drm_connector *connector = &intel_connector->base; - seq_printf(m, "\t\tconnector %d: type: %s, status: %s", - connector->base.id, - connector->name, - drm_get_connector_status_name(connector->status)); - if (connector->status == connector_status_connected) { - struct drm_display_mode *mode = &crtc->mode; - seq_printf(m, ", mode:\n"); - intel_seq_print_mode(m, 2, mode); - } else { - seq_putc(m, '\n'); - } + struct drm_connector_list_iter conn_iter; + struct drm_connector *connector; + + seq_printf(m, "\t[ENCODER:%d:%s]: connectors:\n", + encoder->base.base.id, encoder->base.name); + + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + const struct drm_connector_state *conn_state = + connector->state; + + if (conn_state->best_encoder != &encoder->base) + continue; + + seq_printf(m, "\t\t[CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); } + drm_connector_list_iter_end(&conn_iter); } -static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc) +static void intel_panel_info(struct seq_file *m, struct intel_panel *panel) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_crtc *crtc = &intel_crtc->base; - struct intel_encoder *intel_encoder; - struct drm_plane_state *plane_state = crtc->primary->state; - struct drm_framebuffer *fb = plane_state->fb; + const struct drm_display_mode *mode = panel->fixed_mode; - if (fb) - seq_printf(m, "\tfb: %d, pos: %dx%d, size: %dx%d\n", - fb->base.id, plane_state->src_x >> 16, - plane_state->src_y >> 16, fb->width, fb->height); - else - seq_puts(m, "\tprimary plane disabled\n"); - for_each_encoder_on_crtc(dev, crtc, intel_encoder) - intel_encoder_info(m, intel_crtc, intel_encoder); + seq_printf(m, "\tfixed mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); } -static void intel_panel_info(struct seq_file *m, struct intel_panel *panel) +static void intel_hdcp_info(struct seq_file *m, + struct intel_connector *intel_connector) { - struct drm_display_mode *mode = panel->fixed_mode; + bool hdcp_cap, hdcp2_cap; - seq_printf(m, "\tfixed mode:\n"); - intel_seq_print_mode(m, 2, mode); + hdcp_cap = intel_hdcp_capable(intel_connector); + hdcp2_cap = intel_hdcp2_capable(intel_connector); + + if (hdcp_cap) + seq_puts(m, "HDCP1.4 "); + if (hdcp2_cap) + seq_puts(m, "HDCP2.2 "); + + if (!hdcp_cap && !hdcp2_cap) + seq_puts(m, "None"); + + seq_puts(m, "\n"); } static void intel_dp_info(struct seq_file *m, struct intel_connector *intel_connector) { struct intel_encoder *intel_encoder = intel_connector->encoder; - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]); seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio)); @@ -2616,6 +2400,10 @@ static void intel_dp_info(struct seq_file *m, drm_dp_downstream_debug(m, intel_dp->dpcd, intel_dp->downstream_ports, &intel_dp->aux); + if (intel_connector->hdcp.shim) { + seq_puts(m, "\tHDCP version: "); + intel_hdcp_info(m, intel_connector); + } } static void intel_dp_mst_info(struct seq_file *m, @@ -2623,7 +2411,7 @@ static void intel_dp_mst_info(struct seq_file *m, { struct intel_encoder *intel_encoder = intel_connector->encoder; struct intel_dp_mst_encoder *intel_mst = - enc_to_mst(&intel_encoder->base); + enc_to_mst(intel_encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, @@ -2636,9 +2424,13 @@ static void intel_hdmi_info(struct seq_file *m, struct intel_connector *intel_connector) { struct intel_encoder *intel_encoder = intel_connector->encoder; - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder); seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio)); + if (intel_connector->hdcp.shim) { + seq_puts(m, "\tHDCP version: "); + intel_hdcp_info(m, intel_connector); + } } static void intel_lvds_info(struct seq_file *m, @@ -2651,10 +2443,12 @@ static void intel_connector_info(struct seq_file *m, struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); - struct intel_encoder *intel_encoder = intel_connector->encoder; - struct drm_display_mode *mode; + const struct drm_connector_state *conn_state = connector->state; + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + const struct drm_display_mode *mode; - seq_printf(m, "connector %d: type %s, status: %s\n", + seq_printf(m, "[CONNECTOR:%d:%s]: status: %s\n", connector->base.id, connector->name, drm_get_connector_status_name(connector->status)); @@ -2668,24 +2462,24 @@ static void intel_connector_info(struct seq_file *m, drm_get_subpixel_order_name(connector->display_info.subpixel_order)); seq_printf(m, "\tCEA rev: %d\n", connector->display_info.cea_rev); - if (!intel_encoder) + if (!encoder) return; switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DisplayPort: case DRM_MODE_CONNECTOR_eDP: - if (intel_encoder->type == INTEL_OUTPUT_DP_MST) + if (encoder->type == INTEL_OUTPUT_DP_MST) intel_dp_mst_info(m, intel_connector); else intel_dp_info(m, intel_connector); break; case DRM_MODE_CONNECTOR_LVDS: - if (intel_encoder->type == INTEL_OUTPUT_LVDS) + if (encoder->type == INTEL_OUTPUT_LVDS) intel_lvds_info(m, intel_connector); break; case DRM_MODE_CONNECTOR_HDMIA: - if (intel_encoder->type == INTEL_OUTPUT_HDMI || - intel_encoder->type == INTEL_OUTPUT_DDI) + if (encoder->type == INTEL_OUTPUT_HDMI || + encoder->type == INTEL_OUTPUT_DDI) intel_hdmi_info(m, intel_connector); break; default: @@ -2732,70 +2526,88 @@ static void plane_rotation(char *buf, size_t bufsize, unsigned int rotation) rotation); } -static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc) +static void intel_plane_uapi_info(struct seq_file *m, struct intel_plane *plane) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_plane *intel_plane; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + const struct drm_framebuffer *fb = plane_state->uapi.fb; + struct drm_format_name_buf format_name; + struct drm_rect src, dst; + char rot_str[48]; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - struct drm_plane_state *state; - struct drm_plane *plane = &intel_plane->base; - struct drm_format_name_buf format_name; - char rot_str[48]; + src = drm_plane_state_src(&plane_state->uapi); + dst = drm_plane_state_dest(&plane_state->uapi); - if (!plane->state) { - seq_puts(m, "plane->state is NULL!\n"); - continue; - } + if (fb) + drm_get_format_name(fb->format->format, &format_name); - state = plane->state; + plane_rotation(rot_str, sizeof(rot_str), + plane_state->uapi.rotation); - if (state->fb) { - drm_get_format_name(state->fb->format->format, - &format_name); - } else { - sprintf(format_name.str, "N/A"); - } + seq_printf(m, "\t\tuapi: fb=%d,%s,%dx%d, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n", + fb ? fb->base.id : 0, fb ? format_name.str : "n/a", + fb ? fb->width : 0, fb ? fb->height : 0, + DRM_RECT_FP_ARG(&src), + DRM_RECT_ARG(&dst), + rot_str); +} + +static void intel_plane_hw_info(struct seq_file *m, struct intel_plane *plane) +{ + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + const struct drm_framebuffer *fb = plane_state->hw.fb; + struct drm_format_name_buf format_name; + char rot_str[48]; + + if (!fb) + return; + + drm_get_format_name(fb->format->format, &format_name); - plane_rotation(rot_str, sizeof(rot_str), state->rotation); - - seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n", - plane->base.id, - plane_type(intel_plane->base.type), - state->crtc_x, state->crtc_y, - state->crtc_w, state->crtc_h, - (state->src_x >> 16), - ((state->src_x & 0xffff) * 15625) >> 10, - (state->src_y >> 16), - ((state->src_y & 0xffff) * 15625) >> 10, - (state->src_w >> 16), - ((state->src_w & 0xffff) * 15625) >> 10, - (state->src_h >> 16), - ((state->src_h & 0xffff) * 15625) >> 10, - format_name.str, - rot_str); + plane_rotation(rot_str, sizeof(rot_str), + plane_state->hw.rotation); + + seq_printf(m, "\t\thw: fb=%d,%s,%dx%d, visible=%s, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n", + fb->base.id, format_name.str, + fb->width, fb->height, + yesno(plane_state->uapi.visible), + DRM_RECT_FP_ARG(&plane_state->uapi.src), + DRM_RECT_ARG(&plane_state->uapi.dst), + rot_str); +} + +static void intel_plane_info(struct seq_file *m, struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_plane *plane; + + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + seq_printf(m, "\t[PLANE:%d:%s]: type=%s\n", + plane->base.base.id, plane->base.name, + plane_type(plane->base.type)); + intel_plane_uapi_info(m, plane); + intel_plane_hw_info(m, plane); } } -static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc) +static void intel_scaler_info(struct seq_file *m, struct intel_crtc *crtc) { - struct intel_crtc_state *pipe_config; - int num_scalers = intel_crtc->num_scalers; + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + int num_scalers = crtc->num_scalers; int i; - pipe_config = to_intel_crtc_state(intel_crtc->base.state); - /* Not all platformas have a scaler */ if (num_scalers) { seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d", num_scalers, - pipe_config->scaler_state.scaler_users, - pipe_config->scaler_state.scaler_id); + crtc_state->scaler_state.scaler_users, + crtc_state->scaler_state.scaler_id); for (i = 0; i < num_scalers; i++) { - struct intel_scaler *sc = - &pipe_config->scaler_state.scalers[i]; + const struct intel_scaler *sc = + &crtc_state->scaler_state.scalers[i]; seq_printf(m, ", scalers[%d]: use=%s, mode=%x", i, yesno(sc->in_use), sc->mode); @@ -2806,6 +2618,44 @@ static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc) } } +static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_encoder *encoder; + + seq_printf(m, "[CRTC:%d:%s]:\n", + crtc->base.base.id, crtc->base.name); + + seq_printf(m, "\tuapi: enable=%s, active=%s, mode=" DRM_MODE_FMT "\n", + yesno(crtc_state->uapi.enable), + yesno(crtc_state->uapi.active), + DRM_MODE_ARG(&crtc_state->uapi.mode)); + + if (crtc_state->hw.enable) { + seq_printf(m, "\thw: active=%s, adjusted_mode=" DRM_MODE_FMT "\n", + yesno(crtc_state->hw.active), + DRM_MODE_ARG(&crtc_state->hw.adjusted_mode)); + + seq_printf(m, "\tpipe src size=%dx%d, dither=%s, bpp=%d\n", + crtc_state->pipe_src_w, crtc_state->pipe_src_h, + yesno(crtc_state->dither), crtc_state->pipe_bpp); + + intel_scaler_info(m, crtc); + } + + for_each_intel_encoder_mask(&dev_priv->drm, encoder, + crtc_state->uapi.encoder_mask) + intel_encoder_info(m, crtc, encoder); + + intel_plane_info(m, crtc); + + seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s\n", + yesno(!crtc->cpu_fifo_underrun_disabled), + yesno(!crtc->pch_fifo_underrun_disabled)); +} + static int i915_display_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2817,52 +2667,22 @@ static int i915_display_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + drm_modeset_lock_all(dev); + seq_printf(m, "CRTC info\n"); seq_printf(m, "---------\n"); - for_each_intel_crtc(dev, crtc) { - struct intel_crtc_state *pipe_config; - - drm_modeset_lock(&crtc->base.mutex, NULL); - pipe_config = to_intel_crtc_state(crtc->base.state); - - seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n", - crtc->base.base.id, pipe_name(crtc->pipe), - yesno(pipe_config->base.active), - pipe_config->pipe_src_w, pipe_config->pipe_src_h, - yesno(pipe_config->dither), pipe_config->pipe_bpp); - - if (pipe_config->base.active) { - struct intel_plane *cursor = - to_intel_plane(crtc->base.cursor); - - intel_crtc_info(m, crtc); - - seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n", - yesno(cursor->base.state->visible), - cursor->base.state->crtc_x, - cursor->base.state->crtc_y, - cursor->base.state->crtc_w, - cursor->base.state->crtc_h, - cursor->cursor.base); - intel_scaler_info(m, crtc); - intel_plane_info(m, crtc); - } - - seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n", - yesno(!crtc->cpu_fifo_underrun_disabled), - yesno(!crtc->pch_fifo_underrun_disabled)); - drm_modeset_unlock(&crtc->base.mutex); - } + for_each_intel_crtc(dev, crtc) + intel_crtc_info(m, crtc); seq_printf(m, "\n"); seq_printf(m, "Connector info\n"); seq_printf(m, "--------------\n"); - mutex_lock(&dev->mode_config.mutex); drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) intel_connector_info(m, connector); drm_connector_list_iter_end(&conn_iter); - mutex_unlock(&dev->mode_config.mutex); + + drm_modeset_unlock_all(dev); intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); @@ -2874,7 +2694,6 @@ static int i915_engine_info(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; intel_wakeref_t wakeref; - enum intel_engine_id id; struct drm_printer p; wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); @@ -2886,7 +2705,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); p = drm_seq_file_printer(m); - for_each_engine(engine, dev_priv, id) + for_each_uabi_engine(engine, dev_priv) intel_engine_dump(engine, &p, "%s\n", engine->name); intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); @@ -2899,7 +2718,7 @@ static int i915_rcs_topology(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_printer p = drm_seq_file_printer(m); - intel_device_info_dump_topology(&RUNTIME_INFO(dev_priv)->sseu, &p); + intel_device_info_print_topology(&RUNTIME_INFO(dev_priv)->sseu, &p); return 0; } @@ -2966,14 +2785,27 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); - const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list; - struct i915_wa *wa; - unsigned int i; + struct intel_engine_cs *engine; + + for_each_uabi_engine(engine, i915) { + const struct i915_wa_list *wal = &engine->ctx_wa_list; + const struct i915_wa *wa; + unsigned int count; + + count = wal->count; + if (!count) + continue; + + seq_printf(m, "%s: Workarounds applied: %u\n", + engine->name, count); - seq_printf(m, "Workarounds applied: %u\n", wal->count); - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", - i915_mmio_reg_offset(wa->reg), wa->val, wa->mask); + for (wa = wal->list; count--; wa++) + seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", + i915_mmio_reg_offset(wa->reg), + wa->val, wa->mask); + + seq_printf(m, "\n"); + } return 0; } @@ -3182,16 +3014,17 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; - intel_encoder = intel_attached_encoder(connector); + intel_encoder = intel_attached_encoder(to_intel_connector(connector)); if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST) continue; - intel_dig_port = enc_to_dig_port(&intel_encoder->base); + intel_dig_port = enc_to_dig_port(intel_encoder); if (!intel_dig_port->dp.can_mst) continue; - seq_printf(m, "MST Source Port %c\n", - port_name(intel_dig_port->base.port)); + seq_printf(m, "MST Source Port [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } drm_connector_list_iter_end(&conn_iter); @@ -3235,7 +3068,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file, continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); status = kstrtoint(input_buffer, 10, &val); if (status < 0) break; @@ -3244,9 +3077,9 @@ static ssize_t i915_displayport_test_active_write(struct file *file, * testing code, only accept an actual value of 1 here */ if (val == 1) - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; else - intel_dp->compliance.test_active = 0; + intel_dp->compliance.test_active = false; } } drm_connector_list_iter_end(&conn_iter); @@ -3279,7 +3112,7 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (intel_dp->compliance.test_active) seq_puts(m, "1"); else @@ -3329,7 +3162,7 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (intel_dp->compliance.test_type == DP_TEST_LINK_EDID_READ) seq_printf(m, "%lx", @@ -3373,7 +3206,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); seq_printf(m, "%02lx", intel_dp->compliance.test_type); } else seq_puts(m, "0"); @@ -3620,7 +3453,8 @@ static const struct file_operations i915_cur_wm_latency_fops = { static int i915_wedged_get(void *data, u64 *val) { - int ret = i915_terminally_wedged(data); + struct drm_i915_private *i915 = data; + int ret = intel_gt_terminally_wedged(&i915->gt); switch (ret) { case -EIO: @@ -3640,11 +3474,11 @@ i915_wedged_set(void *data, u64 val) struct drm_i915_private *i915 = data; /* Flush any previous reset before applying for a new one */ - wait_event(i915->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)); + wait_event(i915->gt.reset.queue, + !test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags)); - i915_handle_error(i915, val, I915_ERROR_CAPTURE, - "Manually set wedged engine mask = %llx", val); + intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE, + "Manually set wedged engine mask = %llx", val); return 0; } @@ -3652,6 +3486,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); +static int +i915_perf_noa_delay_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + + /* + * This would lead to infinite waits as we're doing timestamp + * difference on the CS with only 32bits. + */ + if (val > mul_u32_u32(U32_MAX, clk)) + return -EINVAL; + + atomic64_set(&i915->perf.noa_programming_delay, val); + return 0; +} + +static int +i915_perf_noa_delay_get(void *data, u64 *val) +{ + struct drm_i915_private *i915 = data; + + *val = atomic64_read(&i915->perf.noa_programming_delay); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops, + i915_perf_noa_delay_get, + i915_perf_noa_delay_set, + "%llu\n"); + #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -3661,6 +3526,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, #define DROP_IDLE BIT(6) #define DROP_RESET_ACTIVE BIT(7) #define DROP_RESET_SEQNO BIT(8) +#define DROP_RCU BIT(9) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ @@ -3669,7 +3535,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, DROP_SHRINK_ALL |\ DROP_IDLE | \ DROP_RESET_ACTIVE | \ - DROP_RESET_SEQNO) + DROP_RESET_SEQNO | \ + DROP_RCU) static int i915_drop_caches_get(void *data, u64 *val) { @@ -3677,54 +3544,48 @@ i915_drop_caches_get(void *data, u64 *val) return 0; } - static int -i915_drop_caches_set(void *data, u64 val) +gt_drop_caches(struct intel_gt *gt, u64 val) { - struct drm_i915_private *i915 = data; - - DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", - val, val & DROP_ALL); + int ret; if (val & DROP_RESET_ACTIVE && - wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) - i915_gem_set_wedged(i915); + wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) + intel_gt_set_wedged(gt); - /* No need to check and wait for gpu resets, only libdrm auto-restarts - * on ioctls on -EAGAIN. */ - if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) { - int ret; + if (val & DROP_RETIRE) + intel_gt_retire_requests(gt); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (val & (DROP_IDLE | DROP_ACTIVE)) { + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; + } - /* - * To finish the flush of the idle_worker, we must complete - * the switch-to-kernel-context, which requires a double - * pass through wait_for_idle: first queues the switch, - * second waits for the switch. - */ - if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE)) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (ret == 0 && val & DROP_IDLE) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (val & DROP_RETIRE) - i915_retire_requests(i915); - - mutex_unlock(&i915->drm.struct_mutex); + if (val & DROP_IDLE) { + ret = intel_gt_pm_wait_for_idle(gt); + if (ret) + return ret; } - if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915)) - i915_handle_error(i915, ALL_ENGINES, 0, NULL); + if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) + intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); + + return 0; +} + +static int +i915_drop_caches_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + int ret; + + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); + + ret = gt_drop_caches(&i915->gt, val); + if (ret) + return ret; fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) @@ -3737,10 +3598,8 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); - if (val & DROP_IDLE) { - flush_delayed_work(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); - } + if (val & DROP_RCU) + rcu_barrier(); if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); @@ -3796,6 +3655,15 @@ i915_cache_sharing_set(void *data, u64 val) return 0; } +static void +intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice, + u8 *to_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride); +} + DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, i915_cache_sharing_get, i915_cache_sharing_set, "%llu\n"); @@ -3869,12 +3737,13 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; @@ -3920,18 +3789,21 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; + u8 ss_idx = s * info->sseu.ss_stride + + ss / BITS_PER_BYTE; if (IS_GEN9_LP(dev_priv)) { if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; - sseu->subslice_mask[s] |= BIT(ss); + sseu->subslice_mask[ss_idx] |= + BIT(ss % BITS_PER_BYTE); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -3945,28 +3817,26 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, #undef SS_MAX } -static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, - struct sseu_dev_info *sseu) +static void bdw_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) { + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO); int s; sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->eu_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice; - for (s = 0; s < fls(sseu->slice_mask); s++) { - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; - } + sseu->eu_per_subslice = info->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); sseu->eu_total = sseu->eu_per_subslice * intel_sseu_subslice_total(sseu); /* subtract fused off EU(s) from enabled slice(s) */ for (s = 0; s < fls(sseu->slice_mask); s++) { - u8 subslice_7eu = - RUNTIME_INFO(dev_priv)->sseu.subslice_7eu[s]; + u8 subslice_7eu = info->sseu.subslice_7eu[s]; sseu->eu_total -= hweight8(subslice_7eu); } @@ -4013,6 +3883,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); struct sseu_dev_info sseu; intel_wakeref_t wakeref; @@ -4020,20 +3891,19 @@ static int i915_sseu_status(struct seq_file *m, void *unused) return -ENODEV; seq_puts(m, "SSEU Device Info\n"); - i915_print_sseu_info(m, true, &RUNTIME_INFO(dev_priv)->sseu); + i915_print_sseu_info(m, true, &info->sseu); seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); - sseu.max_slices = RUNTIME_INFO(dev_priv)->sseu.max_slices; - sseu.max_subslices = RUNTIME_INFO(dev_priv)->sseu.max_subslices; - sseu.max_eus_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; + intel_sseu_set_info(&sseu, info->sseu.max_slices, + info->sseu.max_subslices, + info->sseu.max_eus_per_subslice); with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_device_status(dev_priv, &sseu); else if (IS_BROADWELL(dev_priv)) - broadwell_sseu_device_status(dev_priv, &sseu); + bdw_sseu_device_status(dev_priv, &sseu); else if (IS_GEN(dev_priv, 9)) gen9_sseu_device_status(dev_priv, &sseu); else if (INTEL_GEN(dev_priv) >= 10) @@ -4048,13 +3918,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - file->private_data = - (void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_user_get(&i915->uncore); + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); return 0; } @@ -4062,13 +3931,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - intel_uncore_forcewake_user_put(&i915->uncore); - intel_runtime_pm_put(&i915->runtime_pm, - (intel_wakeref_t)(uintptr_t)file->private_data); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_put(&i915->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); return 0; } @@ -4087,9 +3955,9 @@ static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) /* Synchronize with everything first in case there's been an HPD * storm, but we haven't finished handling it in the kernel yet */ - synchronize_irq(dev_priv->drm.irq); + intel_synchronize_irq(dev_priv); flush_work(&dev_priv->hotplug.dig_port_work); - flush_work(&dev_priv->hotplug.hotplug_work); + flush_delayed_work(&dev_priv->hotplug.hotplug_work); seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); seq_printf(m, "Detected: %s\n", @@ -4256,11 +4124,11 @@ static int i915_drrs_ctl_set(void *data, u64 val) crtc_state = to_intel_crtc_state(crtc->base.state); - if (!crtc_state->base.active || + if (!crtc_state->hw.active || !crtc_state->has_drrs) goto out; - commit = crtc_state->base.commit; + commit = crtc_state->uapi.commit; if (commit) { ret = wait_for_completion_interruptible(&commit->hw_done); if (ret) @@ -4272,18 +4140,18 @@ static int i915_drrs_ctl_set(void *data, u64 val) struct intel_encoder *encoder; struct intel_dp *intel_dp; - if (!(crtc_state->base.connector_mask & + if (!(crtc_state->uapi.connector_mask & drm_connector_mask(connector))) continue; - encoder = intel_attached_encoder(connector); + encoder = intel_attached_encoder(to_intel_connector(connector)); if (encoder->type != INTEL_OUTPUT_EDP) continue; DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n", val ? "en" : "dis", val); - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (val) intel_edp_drrs_enable(intel_dp, crtc_state); @@ -4331,14 +4199,14 @@ i915_fifo_underrun_reset_write(struct file *filp, return ret; crtc_state = to_intel_crtc_state(intel_crtc->base.state); - commit = crtc_state->base.commit; + commit = crtc_state->uapi.commit; if (commit) { ret = wait_for_completion_interruptible(&commit->hw_done); if (!ret) ret = wait_for_completion_interruptible(&commit->flip_done); } - if (!ret && crtc_state->base.active) { + if (!ret && crtc_state->hw.active) { DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n", pipe_name(intel_crtc->pipe)); @@ -4370,7 +4238,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, - {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, @@ -4378,10 +4245,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, - {"i915_hangcheck_info", i915_hangcheck_info, 0}, - {"i915_reset_info", i915_reset_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, - {"i915_emon_status", i915_emon_status, 0}, {"i915_ring_freq_table", i915_ring_freq_table, 0}, {"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0}, {"i915_fbc_status", i915_fbc_status, 0}, @@ -4417,6 +4281,7 @@ static const struct i915_debugfs_files { const char *name; const struct file_operations *fops; } i915_debugfs_files[] = { + {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, @@ -4490,7 +4355,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); u8 buf[16]; ssize_t err; int i; @@ -4525,7 +4390,7 @@ static int i915_panel_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); if (connector->status != connector_status_connected) return -ENODEV; @@ -4547,7 +4412,6 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_connector *intel_connector = to_intel_connector(connector); - bool hdcp_cap, hdcp2_cap; if (connector->status != connector_status_connected) return -ENODEV; @@ -4558,17 +4422,7 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) seq_printf(m, "%s:%d HDCP version: ", connector->name, connector->base.id); - hdcp_cap = intel_hdcp_capable(intel_connector); - hdcp2_cap = intel_hdcp2_capable(intel_connector); - - if (hdcp_cap) - seq_puts(m, "HDCP1.4 "); - if (hdcp2_cap) - seq_puts(m, "HDCP2.2 "); - - if (!hdcp_cap && !hdcp2_cap) - seq_puts(m, "None"); - seq_puts(m, "\n"); + intel_hdcp_info(m, intel_connector); return 0; } @@ -4614,10 +4468,10 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) } else if (ret) { break; } - intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); + intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", - yesno(crtc_state->dsc_params.compression_enable)); + yesno(crtc_state->dsc.compression_enable)); seq_printf(m, "DSC_Sink_Support: %s\n", yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); seq_printf(m, "Force_DSC_Enable: %s\n", @@ -4641,8 +4495,8 @@ static ssize_t i915_dsc_fec_support_write(struct file *file, int ret; struct drm_connector *connector = ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (len == 0) return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f62e3397d936..f7385abdd74b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -36,7 +36,6 @@ #include <linux/pm_runtime.h> #include <linux/pnp.h> #include <linux/slab.h> -#include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/vt.h> #include <acpi/video.h> @@ -51,446 +50,100 @@ #include "display/intel_audio.h" #include "display/intel_bw.h" #include "display/intel_cdclk.h" +#include "display/intel_display_types.h" #include "display/intel_dp.h" #include "display/intel_fbdev.h" -#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" #include "display/intel_sprite.h" +#include "display/intel_vga.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_mman.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_reset.h" -#include "gt/intel_workarounds.h" +#include "gt/intel_rc6.h" #include "i915_debugfs.h" #include "i915_drv.h" #include "i915_irq.h" -#include "i915_pmu.h" +#include "i915_memcpy.h" +#include "i915_perf.h" #include "i915_query.h" +#include "i915_suspend.h" +#include "i915_switcheroo.h" +#include "i915_sysfs.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_csr.h" -#include "intel_drv.h" +#include "intel_memory_region.h" #include "intel_pm.h" -#include "intel_uc.h" static struct drm_driver driver; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -static unsigned int i915_load_fail_count; - -bool __i915_inject_load_failure(const char *func, int line) -{ - if (i915_load_fail_count >= i915_modparams.inject_load_failure) - return false; - - if (++i915_load_fail_count == i915_modparams.inject_load_failure) { - DRM_INFO("Injecting failure at checkpoint %u [%s:%d]\n", - i915_modparams.inject_load_failure, func, line); - i915_modparams.inject_load_failure = 0; - return true; - } - - return false; -} - -bool i915_error_injected(void) -{ - return i915_load_fail_count && !i915_modparams.inject_load_failure; -} - -#endif - -#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI" -#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \ - "providing the dmesg log by booting with drm.debug=0xf" - -void -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...) -{ - static bool shown_bug_once; - struct device *kdev = dev_priv->drm.dev; - bool is_error = level[1] <= KERN_ERR[1]; - bool is_debug = level[1] == KERN_DEBUG[1]; - struct va_format vaf; - va_list args; - - if (is_debug && !(drm_debug & DRM_UT_DRIVER)) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (is_error) - dev_printk(level, kdev, "%pV", &vaf); - else - dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV", - __builtin_return_address(0), &vaf); - - va_end(args); - - if (is_error && !shown_bug_once) { - /* - * Ask the user to file a bug report for the error, except - * if they may have caused the bug by fiddling with unsafe - * module parameters. - */ - if (!test_taint(TAINT_USER)) - dev_notice(kdev, "%s", FDO_BUG_MSG); - shown_bug_once = true; - } -} - -/* Map PCH device id to PCH type, or PCH_NONE if unknown. */ -static enum intel_pch -intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) -{ - switch (id) { - case INTEL_PCH_IBX_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Ibex Peak PCH\n"); - WARN_ON(!IS_GEN(dev_priv, 5)); - return PCH_IBX; - case INTEL_PCH_CPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found CougarPoint PCH\n"); - WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); - return PCH_CPT; - case INTEL_PCH_PPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found PantherPoint PCH\n"); - WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); - /* PantherPoint is CPT compatible */ - return PCH_CPT; - case INTEL_PCH_LPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found LynxPoint PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); - return PCH_LPT; - case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); - return PCH_LPT; - case INTEL_PCH_WPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found WildcatPoint PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); - /* WildcatPoint is LPT compatible */ - return PCH_LPT; - case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); - /* WildcatPoint is LPT compatible */ - return PCH_LPT; - case INTEL_PCH_SPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); - return PCH_SPT; - case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); - return PCH_SPT; - case INTEL_PCH_KBP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); - /* KBP is SPT compatible */ - return PCH_SPT; - case INTEL_PCH_CNP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); - return PCH_CNP; - case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); - return PCH_CNP; - case INTEL_PCH_CMP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); - WARN_ON(!IS_COFFEELAKE(dev_priv)); - /* CometPoint is CNP Compatible */ - return PCH_CNP; - case INTEL_PCH_ICP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Ice Lake PCH\n"); - WARN_ON(!IS_ICELAKE(dev_priv)); - return PCH_ICP; - case INTEL_PCH_MCC_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); - WARN_ON(!IS_ELKHARTLAKE(dev_priv)); - return PCH_MCC; - default: - return PCH_NONE; - } -} - -static bool intel_is_virt_pch(unsigned short id, - unsigned short svendor, unsigned short sdevice) -{ - return (id == INTEL_PCH_P2X_DEVICE_ID_TYPE || - id == INTEL_PCH_P3X_DEVICE_ID_TYPE || - (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE && - svendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET && - sdevice == PCI_SUBDEVICE_ID_QEMU)); -} - -static unsigned short -intel_virt_detect_pch(const struct drm_i915_private *dev_priv) -{ - unsigned short id = 0; - - /* - * In a virtualized passthrough environment we can be in a - * setup where the ISA bridge is not able to be passed through. - * In this case, a south bridge can be emulated and we have to - * make an educated guess as to which PCH is really there. - */ - - if (IS_ELKHARTLAKE(dev_priv)) - id = INTEL_PCH_MCC_DEVICE_ID_TYPE; - else if (IS_ICELAKE(dev_priv)) - id = INTEL_PCH_ICP_DEVICE_ID_TYPE; - else if (IS_CANNONLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) - id = INTEL_PCH_CNP_DEVICE_ID_TYPE; - else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) - id = INTEL_PCH_SPT_DEVICE_ID_TYPE; - else if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) - id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - id = INTEL_PCH_LPT_DEVICE_ID_TYPE; - else if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) - id = INTEL_PCH_CPT_DEVICE_ID_TYPE; - else if (IS_GEN(dev_priv, 5)) - id = INTEL_PCH_IBX_DEVICE_ID_TYPE; - - if (id) - DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id); - else - DRM_DEBUG_KMS("Assuming no PCH\n"); - - return id; -} - -static void intel_detect_pch(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pch = NULL; - - /* - * The reason to probe ISA bridge instead of Dev31:Fun0 is to - * make graphics device passthrough work easy for VMM, that only - * need to expose ISA bridge to let driver know the real hardware - * underneath. This is a requirement from virtualization team. - * - * In some virtualized environments (e.g. XEN), there is irrelevant - * ISA bridge in the system. To work reliably, we should scan trhough - * all the ISA bridge devices and check for the first match, instead - * of only checking the first one. - */ - while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { - unsigned short id; - enum intel_pch pch_type; - - if (pch->vendor != PCI_VENDOR_ID_INTEL) - continue; - - id = pch->device & INTEL_PCH_DEVICE_ID_MASK; - - pch_type = intel_pch_type(dev_priv, id); - if (pch_type != PCH_NONE) { - dev_priv->pch_type = pch_type; - dev_priv->pch_id = id; - break; - } else if (intel_is_virt_pch(id, pch->subsystem_vendor, - pch->subsystem_device)) { - id = intel_virt_detect_pch(dev_priv); - pch_type = intel_pch_type(dev_priv, id); - - /* Sanity check virtual PCH id */ - if (WARN_ON(id && pch_type == PCH_NONE)) - id = 0; - - dev_priv->pch_type = pch_type; - dev_priv->pch_id = id; - break; - } - } - - /* - * Use PCH_NOP (PCH but no South Display) for PCH platforms without - * display. - */ - if (pch && !HAS_DISPLAY(dev_priv)) { - DRM_DEBUG_KMS("Display disabled, reverting to NOP PCH\n"); - dev_priv->pch_type = PCH_NOP; - dev_priv->pch_id = 0; - } - - if (!pch) - DRM_DEBUG_KMS("No PCH found.\n"); - - pci_dev_put(pch); -} - -static int i915_getparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct pci_dev *pdev = dev_priv->drm.pdev; - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - drm_i915_getparam_t *param = data; - int value; - - switch (param->param) { - case I915_PARAM_IRQ_ACTIVE: - case I915_PARAM_ALLOW_BATCHBUFFER: - case I915_PARAM_LAST_DISPATCH: - case I915_PARAM_HAS_EXEC_CONSTANTS: - /* Reject all old ums/dri params. */ - return -ENODEV; - case I915_PARAM_CHIPSET_ID: - value = pdev->device; - break; - case I915_PARAM_REVISION: - value = pdev->revision; - break; - case I915_PARAM_NUM_FENCES_AVAIL: - value = dev_priv->ggtt.num_fences; - break; - case I915_PARAM_HAS_OVERLAY: - value = dev_priv->overlay ? 1 : 0; - break; - case I915_PARAM_HAS_BSD: - value = !!dev_priv->engine[VCS0]; - break; - case I915_PARAM_HAS_BLT: - value = !!dev_priv->engine[BCS0]; - break; - case I915_PARAM_HAS_VEBOX: - value = !!dev_priv->engine[VECS0]; - break; - case I915_PARAM_HAS_BSD2: - value = !!dev_priv->engine[VCS1]; - break; - case I915_PARAM_HAS_LLC: - value = HAS_LLC(dev_priv); - break; - case I915_PARAM_HAS_WT: - value = HAS_WT(dev_priv); - break; - case I915_PARAM_HAS_ALIASING_PPGTT: - value = INTEL_PPGTT(dev_priv); - break; - case I915_PARAM_HAS_SEMAPHORES: - value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); - break; - case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); - break; - case I915_PARAM_CMD_PARSER_VERSION: - value = i915_cmd_parser_get_version(dev_priv); - break; - case I915_PARAM_SUBSLICE_TOTAL: - value = intel_sseu_subslice_total(sseu); - if (!value) - return -ENODEV; - break; - case I915_PARAM_EU_TOTAL: - value = sseu->eu_total; - if (!value) - return -ENODEV; - break; - case I915_PARAM_HAS_GPU_RESET: - value = i915_modparams.enable_hangcheck && - intel_has_gpu_reset(dev_priv); - if (value && intel_has_reset_engine(dev_priv)) - value = 2; - break; - case I915_PARAM_HAS_RESOURCE_STREAMER: - value = 0; - break; - case I915_PARAM_HAS_POOLED_EU: - value = HAS_POOLED_EU(dev_priv); - break; - case I915_PARAM_MIN_EU_IN_POOL: - value = sseu->min_eu_in_pool; - break; - case I915_PARAM_HUC_STATUS: - value = intel_huc_check_status(&dev_priv->huc); - if (value < 0) - return value; - break; - case I915_PARAM_MMAP_GTT_VERSION: - /* Though we've started our numbering from 1, and so class all - * earlier versions as 0, in effect their value is undefined as - * the ioctl will report EINVAL for the unknown param! - */ - value = i915_gem_mmap_gtt_version(); - break; - case I915_PARAM_HAS_SCHEDULER: - value = dev_priv->caps.scheduler; - break; - - case I915_PARAM_MMAP_VERSION: - /* Remember to bump this if the version changes! */ - case I915_PARAM_HAS_GEM: - case I915_PARAM_HAS_PAGEFLIPPING: - case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */ - case I915_PARAM_HAS_RELAXED_FENCING: - case I915_PARAM_HAS_COHERENT_RINGS: - case I915_PARAM_HAS_RELAXED_DELTA: - case I915_PARAM_HAS_GEN7_SOL_RESET: - case I915_PARAM_HAS_WAIT_TIMEOUT: - case I915_PARAM_HAS_PRIME_VMAP_FLUSH: - case I915_PARAM_HAS_PINNED_BATCHES: - case I915_PARAM_HAS_EXEC_NO_RELOC: - case I915_PARAM_HAS_EXEC_HANDLE_LUT: - case I915_PARAM_HAS_COHERENT_PHYS_GTT: - case I915_PARAM_HAS_EXEC_SOFTPIN: - case I915_PARAM_HAS_EXEC_ASYNC: - case I915_PARAM_HAS_EXEC_FENCE: - case I915_PARAM_HAS_EXEC_CAPTURE: - case I915_PARAM_HAS_EXEC_BATCH_FIRST: - case I915_PARAM_HAS_EXEC_FENCE_ARRAY: - case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: - /* For the time being all of these are always true; - * if some supported hardware does not have one of these - * features this value needs to be provided from - * INTEL_INFO(), a feature macro, or similar. - */ - value = 1; - break; - case I915_PARAM_HAS_CONTEXT_ISOLATION: - value = intel_engines_has_context_isolation(dev_priv); - break; - case I915_PARAM_SLICE_MASK: - value = sseu->slice_mask; - if (!value) - return -ENODEV; - break; - case I915_PARAM_SUBSLICE_MASK: - value = sseu->subslice_mask[0]; - if (!value) - return -ENODEV; - break; - case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = 1000 * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz; - break; - case I915_PARAM_MMAP_GTT_COHERENT: - value = INTEL_INFO(dev_priv)->has_coherent_ggtt; - break; - default: - DRM_DEBUG("Unknown parameter %d\n", param->param); - return -EINVAL; - } - - if (put_user(value, param->value)) - return -EFAULT; - - return 0; -} +struct vlv_s0ix_state { + /* GAM */ + u32 wr_watermark; + u32 gfx_prio_ctrl; + u32 arb_mode; + u32 gfx_pend_tlb0; + u32 gfx_pend_tlb1; + u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM]; + u32 media_max_req_count; + u32 gfx_max_req_count; + u32 render_hwsp; + u32 ecochk; + u32 bsd_hwsp; + u32 blt_hwsp; + u32 tlb_rd_addr; + + /* MBC */ + u32 g3dctl; + u32 gsckgctl; + u32 mbctl; + + /* GCP */ + u32 ucgctl1; + u32 ucgctl3; + u32 rcgctl1; + u32 rcgctl2; + u32 rstctl; + u32 misccpctl; + + /* GPM */ + u32 gfxpause; + u32 rpdeuhwtc; + u32 rpdeuc; + u32 ecobus; + u32 pwrdwnupctl; + u32 rp_down_timeout; + u32 rp_deucsw; + u32 rcubmabdtmr; + u32 rcedata; + u32 spare2gh; + + /* Display 1 CZ domain */ + u32 gt_imr; + u32 gt_ier; + u32 pm_imr; + u32 pm_ier; + u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM]; + + /* GT SA CZ domain */ + u32 tilectl; + u32 gt_fifoctl; + u32 gtlc_wake_ctrl; + u32 gtlc_survive; + u32 pmwgicz; + + /* Display 2 CZ domain */ + u32 gu_ctl0; + u32 gu_ctl1; + u32 pcbr; + u32 clock_gate_dis2; +}; static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) { @@ -619,176 +272,97 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -/* true = enable decode, false = disable decoder */ -static unsigned int i915_vga_set_decode(void *cookie, bool state) +static int i915_driver_modeset_probe(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = cookie; - - intel_modeset_vga_set_state(dev_priv, state); - if (state) - return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; - else - return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; -} - -static int i915_resume_switcheroo(struct drm_device *dev); -static int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state); - -static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; - - if (state == VGA_SWITCHEROO_ON) { - pr_info("switched on\n"); - dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - /* i915 resume handler doesn't set to D0 */ - pci_set_power_state(pdev, PCI_D0); - i915_resume_switcheroo(dev); - dev->switch_power_state = DRM_SWITCH_POWER_ON; - } else { - pr_info("switched off\n"); - dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - i915_suspend_switcheroo(dev, pmm); - dev->switch_power_state = DRM_SWITCH_POWER_OFF; - } -} - -static bool i915_switcheroo_can_switch(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - /* - * FIXME: open_count is protected by drm_global_mutex but that would lead to - * locking inversion with the driver load path. And the access here is - * completely racy anyway. So don't bother with locking for now. - */ - return dev->open_count == 0; -} - -static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { - .set_gpu_state = i915_switcheroo_set_state, - .reprobe = NULL, - .can_switch = i915_switcheroo_can_switch, -}; - -static int i915_load_modeset_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct pci_dev *pdev = dev_priv->drm.pdev; int ret; - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(i915)) return -ENODEV; - if (HAS_DISPLAY(dev_priv)) { - ret = drm_vblank_init(&dev_priv->drm, - INTEL_INFO(dev_priv)->num_pipes); + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + ret = drm_vblank_init(&i915->drm, + INTEL_NUM_PIPES(i915)); if (ret) goto out; } - intel_bios_init(dev_priv); + intel_bios_init(i915); - /* If we have > 1 VGA cards, then we need to arbitrate access - * to the common VGA resources. - * - * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), - * then we do not take part in VGA arbitration and the - * vga_client_register() fails with -ENODEV. - */ - ret = vga_client_register(pdev, dev_priv, NULL, i915_vga_set_decode); - if (ret && ret != -ENODEV) + ret = intel_vga_register(i915); + if (ret) goto out; intel_register_dsm_handler(); - ret = vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); + ret = i915_switcheroo_register(i915); if (ret) goto cleanup_vga_client; - /* must happen before intel_power_domains_init_hw() on VLV/CHV */ - intel_update_rawclk(dev_priv); - - intel_power_domains_init_hw(dev_priv, false); + intel_power_domains_init_hw(i915, false); - intel_csr_ucode_init(dev_priv); + intel_csr_ucode_init(i915); - ret = intel_irq_install(dev_priv); + ret = intel_irq_install(i915); if (ret) goto cleanup_csr; - intel_gmbus_setup(dev_priv); - /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(dev); + ret = intel_modeset_init(i915); if (ret) goto cleanup_irq; - ret = i915_gem_init(dev_priv); + ret = i915_gem_init(i915); if (ret) goto cleanup_modeset; - intel_overlay_setup(dev_priv); + intel_overlay_setup(i915); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915)) return 0; - ret = intel_fbdev_init(dev); + ret = intel_fbdev_init(&i915->drm); if (ret) goto cleanup_gem; /* Only enable hotplug handling once the fbdev is fully set up. */ - intel_hpd_init(dev_priv); + intel_hpd_init(i915); - intel_init_ipc(dev_priv); + intel_init_ipc(i915); return 0; cleanup_gem: - i915_gem_suspend(dev_priv); - i915_gem_fini_hw(dev_priv); - i915_gem_fini(dev_priv); + i915_gem_suspend(i915); + i915_gem_driver_remove(i915); + i915_gem_driver_release(i915); cleanup_modeset: - intel_modeset_cleanup(dev); + intel_modeset_driver_remove(i915); cleanup_irq: - drm_irq_uninstall(dev); - intel_gmbus_teardown(dev_priv); + intel_irq_uninstall(i915); cleanup_csr: - intel_csr_ucode_fini(dev_priv); - intel_power_domains_fini_hw(dev_priv); - vga_switcheroo_unregister_client(pdev); + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); + i915_switcheroo_unregister(i915); cleanup_vga_client: - vga_client_register(pdev, NULL, NULL, NULL); + intel_vga_unregister(i915); out: return ret; } -static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) +static void i915_driver_modeset_remove(struct drm_i915_private *i915) { - struct apertures_struct *ap; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool primary; - int ret; - - ap = alloc_apertures(1); - if (!ap) - return -ENOMEM; + intel_modeset_driver_remove(i915); - ap->ranges[0].base = ggtt->gmadr.start; - ap->ranges[0].size = ggtt->mappable_end; + intel_irq_uninstall(i915); - primary = - pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; + intel_bios_driver_remove(i915); - ret = drm_fb_helper_remove_conflicting_framebuffers(ap, "inteldrmfb", primary); + i915_switcheroo_unregister(i915); - kfree(ap); + intel_vga_unregister(i915); - return ret; + intel_csr_ucode_fini(i915); } static void intel_init_dpio(struct drm_i915_private *dev_priv) @@ -840,15 +414,6 @@ out_err: return -ENOMEM; } -static void i915_engines_cleanup(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - kfree(engine); -} - static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -881,8 +446,37 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) } } +static int vlv_alloc_s0ix_state(struct drm_i915_private *i915) +{ + if (!IS_VALLEYVIEW(i915)) + return 0; + + /* we write all the values in the struct, so no need to zero it out */ + i915->vlv_s0ix_state = kmalloc(sizeof(*i915->vlv_s0ix_state), + GFP_KERNEL); + if (!i915->vlv_s0ix_state) + return -ENOMEM; + + return 0; +} + +static void vlv_free_s0ix_state(struct drm_i915_private *i915) +{ + if (!i915->vlv_s0ix_state) + return; + + kfree(i915->vlv_s0ix_state); + i915->vlv_s0ix_state = NULL; +} + +static void sanitize_gpu(struct drm_i915_private *i915) +{ + if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) + __intel_gt_reset(&i915->gt, ALL_ENGINES); +} + /** - * i915_driver_init_early - setup state not requiring device access + * i915_driver_early_probe - setup state not requiring device access * @dev_priv: device private * * Initialize everything that is a "SW-only" state, that is state not @@ -891,16 +485,17 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) * system memory allocation, setting up device specific attributes and * function hooks not requiring accessing the device. */ -static int i915_driver_init_early(struct drm_i915_private *dev_priv) +static int i915_driver_early_probe(struct drm_i915_private *dev_priv) { int ret = 0; - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(dev_priv)) return -ENODEV; intel_device_info_subplatform_init(dev_priv); - intel_uncore_init_early(&dev_priv->uncore); + intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug); + intel_uncore_init_early(&dev_priv->uncore, dev_priv); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); @@ -920,24 +515,27 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) ret = i915_workqueues_init(dev_priv); if (ret < 0) - goto err_engines; + return ret; - ret = i915_gem_init_early(dev_priv); + ret = vlv_alloc_s0ix_state(dev_priv); if (ret < 0) goto err_workqueues; + intel_wopcm_init_early(&dev_priv->wopcm); + + intel_gt_init_early(&dev_priv->gt, dev_priv); + + i915_gem_init_early(dev_priv); + /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); - intel_wopcm_init_early(&dev_priv->wopcm); - intel_uc_init_early(dev_priv); intel_pm_setup(dev_priv); intel_init_dpio(dev_priv); ret = intel_power_domains_init(dev_priv); if (ret < 0) - goto err_uc; + goto err_gem; intel_irq_init(dev_priv); - intel_hangcheck_init(dev_priv); intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); @@ -947,35 +545,35 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) return 0; -err_uc: - intel_uc_cleanup_early(dev_priv); +err_gem: i915_gem_cleanup_early(dev_priv); + intel_gt_driver_late_release(&dev_priv->gt); + vlv_free_s0ix_state(dev_priv); err_workqueues: i915_workqueues_cleanup(dev_priv); -err_engines: - i915_engines_cleanup(dev_priv); return ret; } /** - * i915_driver_cleanup_early - cleanup the setup done in i915_driver_init_early() + * i915_driver_late_release - cleanup the setup done in + * i915_driver_early_probe() * @dev_priv: device private */ -static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) +static void i915_driver_late_release(struct drm_i915_private *dev_priv) { intel_irq_fini(dev_priv); intel_power_domains_cleanup(dev_priv); - intel_uc_cleanup_early(dev_priv); i915_gem_cleanup_early(dev_priv); + intel_gt_driver_late_release(&dev_priv->gt); + vlv_free_s0ix_state(dev_priv); i915_workqueues_cleanup(dev_priv); - i915_engines_cleanup(dev_priv); pm_qos_remove_request(&dev_priv->sb_qos); mutex_destroy(&dev_priv->sb_lock); } /** - * i915_driver_init_mmio - setup device MMIO + * i915_driver_mmio_probe - setup device MMIO * @dev_priv: device private * * Setup minimal device state necessary for MMIO accesses later in the @@ -983,11 +581,11 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) * side effects or exposing the driver via kernel internal or user space * interfaces. */ -static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) +static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) { int ret; - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(dev_priv)) return -ENODEV; if (i915_get_bridge_dev(dev_priv)) @@ -1004,13 +602,14 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_prune_mmio_domains(&dev_priv->uncore); - intel_uc_init_mmio(dev_priv); + intel_uc_init_mmio(&dev_priv->gt.uc); - ret = intel_engines_init_mmio(dev_priv); + ret = intel_engines_init_mmio(&dev_priv->gt); if (ret) goto err_uncore; - i915_gem_init_mmio(dev_priv); + /* As early as possible, scrub existing GPU state before clobbering */ + sanitize_gpu(dev_priv); return 0; @@ -1024,10 +623,10 @@ err_bridge: } /** - * i915_driver_cleanup_mmio - cleanup the setup done in i915_driver_init_mmio() + * i915_driver_mmio_release - cleanup the setup done in i915_driver_mmio_probe() * @dev_priv: device private */ -static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv) +static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); @@ -1462,7 +1061,7 @@ intel_get_dram_info(struct drm_i915_private *dev_priv) */ dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv); - if (INTEL_GEN(dev_priv) < 9) + if (INTEL_GEN(dev_priv) < 9 || !HAS_DISPLAY(dev_priv)) return; if (IS_GEN9_LP(dev_priv)) @@ -1482,8 +1081,8 @@ intel_get_dram_info(struct drm_i915_private *dev_priv) static u32 gen9_edram_size_mb(struct drm_i915_private *dev_priv, u32 cap) { - const unsigned int ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; - const unsigned int sets[4] = { 1, 1, 2, 2 }; + static const u8 ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; + static const u8 sets[4] = { 1, 1, 2, 2 }; return EDRAM_NUM_BANKS(cap) * ways[EDRAM_WAYS_IDX(cap)] * @@ -1516,22 +1115,23 @@ static void edram_detect(struct drm_i915_private *dev_priv) dev_priv->edram_size_mb = gen9_edram_size_mb(dev_priv, edram_cap); - DRM_INFO("Found %uMB of eDRAM\n", dev_priv->edram_size_mb); + dev_info(dev_priv->drm.dev, + "Found %uMB of eDRAM\n", dev_priv->edram_size_mb); } /** - * i915_driver_init_hw - setup state requiring device access + * i915_driver_hw_probe - setup state requiring device access * @dev_priv: device private * * Setup state that requires accessing the device, but doesn't require * exposing the driver via kernel internal or userspace interfaces. */ -static int i915_driver_init_hw(struct drm_i915_private *dev_priv) +static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; int ret; - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(dev_priv)) return -ENODEV; intel_device_info_runtime_init(dev_priv); @@ -1570,41 +1170,41 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_perf; - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ - ret = i915_kick_out_firmware_fb(dev_priv); - if (ret) { - DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); + ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "inteldrmfb"); + if (ret) goto err_ggtt; - } - ret = vga_remove_vgacon(pdev); - if (ret) { - DRM_ERROR("failed to remove conflicting VGA console\n"); + ret = i915_ggtt_init_hw(dev_priv); + if (ret) goto err_ggtt; - } - ret = i915_ggtt_init_hw(dev_priv); + ret = intel_memory_regions_hw_probe(dev_priv); if (ret) goto err_ggtt; + intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto err_ggtt; + goto err_mem_regions; } pci_set_master(pdev); + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + /* overlay on gen2 is broken and can't address above 1G */ if (IS_GEN(dev_priv, 2)) { ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30)); if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_ggtt; + goto err_mem_regions; } } @@ -1622,15 +1222,13 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_ggtt; + goto err_mem_regions; } } pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); - intel_uncore_sanitize(dev_priv); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -1676,18 +1274,20 @@ err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); pm_qos_remove_request(&dev_priv->pm_qos); +err_mem_regions: + intel_memory_regions_driver_release(dev_priv); err_ggtt: - i915_ggtt_cleanup_hw(dev_priv); + i915_ggtt_driver_release(dev_priv); err_perf: i915_perf_fini(dev_priv); return ret; } /** - * i915_driver_cleanup_hw - cleanup the setup done in i915_driver_init_hw() + * i915_driver_hw_remove - cleanup the setup done in i915_driver_hw_probe() * @dev_priv: device private */ -static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) +static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; @@ -1710,7 +1310,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - i915_gem_shrinker_register(dev_priv); + i915_gem_driver_register(dev_priv); i915_pmu_register(dev_priv); /* @@ -1730,14 +1330,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) } else DRM_ERROR("Failed to register driver for userspace access!\n"); - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { /* Must be done after probing outputs */ intel_opregion_register(dev_priv); acpi_video_register(); } - if (IS_GEN(dev_priv, 5)) - intel_gpu_ips_init(dev_priv); + intel_gt_driver_register(&dev_priv->gt); intel_audio_init(dev_priv); @@ -1754,7 +1353,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * We need to coordinate the hotplugs with the asynchronous fbdev * configuration, for which we use the fbdev->async_cookie. */ - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) drm_kms_helper_poll_init(dev); intel_power_domains_enable(dev_priv); @@ -1780,7 +1379,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ drm_kms_helper_poll_fini(&dev_priv->drm); - intel_gpu_ips_teardown(); + intel_gt_driver_unregister(&dev_priv->gt); acpi_video_unregister(); intel_opregion_unregister(dev_priv); @@ -1790,12 +1389,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_teardown_sysfs(dev_priv); drm_dev_unplug(&dev_priv->drm); - i915_gem_shrinker_unregister(dev_priv); + i915_gem_driver_unregister(dev_priv); } static void i915_welcome_messages(struct drm_i915_private *dev_priv) { - if (drm_debug & DRM_UT_DRIVER) { + if (drm_debug_enabled(DRM_UT_DRIVER)) { struct drm_printer p = drm_debug_printer("i915 device info:"); drm_printf(&p, "pciid=0x%04x rev=0x%02x platform=%s (subplatform=0x%x) gen=%i\n", @@ -1806,8 +1405,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) INTEL_INFO(dev_priv)->platform), INTEL_GEN(dev_priv)); - intel_device_info_dump_flags(INTEL_INFO(dev_priv), &p); - intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p); + intel_device_info_print_static(INTEL_INFO(dev_priv), &p); + intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p); } if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) @@ -1837,9 +1436,10 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) return ERR_PTR(err); } - i915->drm.pdev = pdev; i915->drm.dev_private = i915; - pci_set_drvdata(pdev, &i915->drm); + + i915->drm.pdev = pdev; + pci_set_drvdata(pdev, i915); /* Setup the write-once "constant" device info */ device_info = mkwrite_device_info(i915); @@ -1863,17 +1463,17 @@ static void i915_driver_destroy(struct drm_i915_private *i915) } /** - * i915_driver_load - setup chip and create an initial config + * i915_driver_probe - setup chip and create an initial config * @pdev: PCI device * @ent: matching PCI ID entry * - * The driver load routine has to do several things: + * The driver probe routine has to do several things: * - drive output discovery via intel_modeset_init() * - initialize the memory manager * - allocate initial config memory * - setup the DRM framebuffer with the allocated memory */ -int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) +int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct intel_device_info *match_info = (struct intel_device_info *)ent->driver_data; @@ -1888,25 +1488,44 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) if (!i915_modparams.nuclear_pageflip && match_info->gen < 5) dev_priv->drm.driver_features &= ~DRIVER_ATOMIC; + /* + * Check if we support fake LMEM -- for now we only unleash this for + * the live selftests(test-and-exit). + */ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { + if (INTEL_GEN(dev_priv) >= 9 && i915_selftest.live < 0 && + i915_modparams.fake_lmem_start) { + mkwrite_device_info(dev_priv)->memory_regions = + REGION_SMEM | REGION_LMEM | REGION_STOLEN; + mkwrite_device_info(dev_priv)->is_dgfx = true; + GEM_BUG_ON(!HAS_LMEM(dev_priv)); + GEM_BUG_ON(!IS_DGFX(dev_priv)); + } + } +#endif + ret = pci_enable_device(pdev); if (ret) goto out_fini; - ret = i915_driver_init_early(dev_priv); + ret = i915_driver_early_probe(dev_priv); if (ret < 0) goto out_pci_disable; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - ret = i915_driver_init_mmio(dev_priv); + i915_detect_vgpu(dev_priv); + + ret = i915_driver_mmio_probe(dev_priv); if (ret < 0) goto out_runtime_pm_put; - ret = i915_driver_init_hw(dev_priv); + ret = i915_driver_hw_probe(dev_priv); if (ret < 0) goto out_cleanup_mmio; - ret = i915_load_modeset_init(&dev_priv->drm); + ret = i915_driver_modeset_probe(dev_priv); if (ret < 0) goto out_cleanup_hw; @@ -1919,66 +1538,54 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; out_cleanup_hw: - i915_driver_cleanup_hw(dev_priv); - i915_ggtt_cleanup_hw(dev_priv); + i915_driver_hw_remove(dev_priv); + intel_memory_regions_driver_release(dev_priv); + i915_ggtt_driver_release(dev_priv); out_cleanup_mmio: - i915_driver_cleanup_mmio(dev_priv); + i915_driver_mmio_release(dev_priv); out_runtime_pm_put: enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - i915_driver_cleanup_early(dev_priv); + i915_driver_late_release(dev_priv); out_pci_disable: pci_disable_device(pdev); out_fini: - i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); + i915_probe_error(dev_priv, "Device initialization failed (%d)\n", ret); i915_driver_destroy(dev_priv); return ret; } -void i915_driver_unload(struct drm_device *dev) +void i915_driver_remove(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct pci_dev *pdev = dev_priv->drm.pdev; + disable_rpm_wakeref_asserts(&i915->runtime_pm); - disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - - i915_driver_unregister(dev_priv); + i915_driver_unregister(i915); /* * After unregistering the device to prevent any new users, cancel * all in-flight requests so that we can quickly unbind the active * resources. */ - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&i915->gt); /* Flush any external code that still may be under the RCU lock */ synchronize_rcu(); - i915_gem_suspend(dev_priv); - - drm_atomic_helper_shutdown(dev); - - intel_gvt_cleanup(dev_priv); - - intel_modeset_cleanup(dev); - - intel_bios_cleanup(dev_priv); + i915_gem_suspend(i915); - vga_switcheroo_unregister_client(pdev); - vga_client_register(pdev, NULL, NULL, NULL); + drm_atomic_helper_shutdown(&i915->drm); - intel_csr_ucode_fini(dev_priv); + intel_gvt_driver_remove(i915); - /* Free error state after interrupts are fully disabled. */ - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_reset_error_state(dev_priv); + i915_driver_modeset_remove(i915); - i915_gem_fini_hw(dev_priv); + i915_reset_error_state(i915); + i915_gem_driver_remove(i915); - intel_power_domains_fini_hw(dev_priv); + intel_power_domains_driver_remove(i915); - i915_driver_cleanup_hw(dev_priv); + i915_driver_hw_remove(i915); - enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + enable_rpm_wakeref_asserts(&i915->runtime_pm); } static void i915_driver_release(struct drm_device *dev) @@ -1988,15 +1595,17 @@ static void i915_driver_release(struct drm_device *dev) disable_rpm_wakeref_asserts(rpm); - i915_gem_fini(dev_priv); + i915_gem_driver_release(dev_priv); + + intel_memory_regions_driver_release(dev_priv); + i915_ggtt_driver_release(dev_priv); - i915_ggtt_cleanup_hw(dev_priv); - i915_driver_cleanup_mmio(dev_priv); + i915_driver_mmio_release(dev_priv); enable_rpm_wakeref_asserts(rpm); - intel_runtime_pm_cleanup(rpm); + intel_runtime_pm_driver_release(rpm); - i915_driver_cleanup_early(dev_priv); + i915_driver_late_release(dev_priv); i915_driver_destroy(dev_priv); } @@ -2034,12 +1643,13 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - mutex_lock(&dev->struct_mutex); i915_gem_context_close(file); i915_gem_release(dev, file); - mutex_unlock(&dev->struct_mutex); - kfree(file_priv); + kfree_rcu(file_priv, rcu); + + /* Catch up with all the deferred frees from "this" client */ + i915_gem_flush_free_objects(to_i915(dev)); } static void intel_suspend_encoders(struct drm_i915_private *dev_priv) @@ -2144,7 +1754,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - int ret; + int ret = 0; disable_rpm_wakeref_asserts(rpm); @@ -2155,12 +1765,9 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_power_domains_suspend(dev_priv, get_suspend_mode(dev_priv, hibernation)); - ret = 0; - if (INTEL_GEN(dev_priv) >= 11 || IS_GEN9_LP(dev_priv)) - bxt_enable_dc9(dev_priv); - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - hsw_enable_pc8(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + intel_display_power_suspend_late(dev_priv); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) ret = vlv_suspend_complete(dev_priv); if (ret) { @@ -2188,34 +1795,28 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) out: enable_rpm_wakeref_asserts(rpm); - if (!dev_priv->uncore.user_forcewake.count) - intel_runtime_pm_cleanup(rpm); + if (!dev_priv->uncore.user_forcewake_count) + intel_runtime_pm_driver_release(rpm); return ret; } -static int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state) +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) { int error; - if (!dev) { - DRM_ERROR("dev: %p\n", dev); - DRM_ERROR("DRM not initialized, aborting suspend.\n"); - return -ENODEV; - } - if (WARN_ON_ONCE(state.event != PM_EVENT_SUSPEND && state.event != PM_EVENT_FREEZE)) return -EINVAL; - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - error = i915_drm_suspend(dev); + error = i915_drm_suspend(&i915->drm); if (error) return error; - return i915_drm_suspend_late(dev, false); + return i915_drm_suspend_late(&i915->drm, false); } static int i915_drm_resume(struct drm_device *dev) @@ -2224,14 +1825,16 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_sanitize_gt_powersave(dev_priv); - i915_gem_sanitize(dev_priv); + sanitize_gpu(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) DRM_ERROR("failed to re-enable GGTT\n"); + i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); + intel_csr_ucode_resume(dev_priv); i915_restore_state(dev_priv); @@ -2255,7 +1858,7 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -2348,75 +1951,64 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_resume_early(&dev_priv->uncore); - i915_check_and_clear_faults(dev_priv); - - if (INTEL_GEN(dev_priv) >= 11 || IS_GEN9_LP(dev_priv)) { - gen9_sanitize_dc_state(dev_priv); - bxt_disable_dc9(dev_priv); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - hsw_disable_pc8(dev_priv); - } + intel_gt_check_and_clear_faults(&dev_priv->gt); - intel_uncore_sanitize(dev_priv); + intel_display_power_resume_early(dev_priv); intel_power_domains_resume(dev_priv); - intel_gt_sanitize(dev_priv, true); - enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; } -static int i915_resume_switcheroo(struct drm_device *dev) +int i915_resume_switcheroo(struct drm_i915_private *i915) { int ret; - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - ret = i915_drm_resume_early(dev); + ret = i915_drm_resume_early(&i915->drm); if (ret) return ret; - return i915_drm_resume(dev); + return i915_drm_resume(&i915->drm); } static int i915_pm_prepare(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(kdev); - struct drm_device *dev = pci_get_drvdata(pdev); + struct drm_i915_private *i915 = kdev_to_i915(kdev); - if (!dev) { + if (!i915) { dev_err(kdev, "DRM not initialized, aborting suspend.\n"); return -ENODEV; } - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_prepare(dev); + return i915_drm_prepare(&i915->drm); } static int i915_pm_suspend(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(kdev); - struct drm_device *dev = pci_get_drvdata(pdev); + struct drm_i915_private *i915 = kdev_to_i915(kdev); - if (!dev) { + if (!i915) { dev_err(kdev, "DRM not initialized, aborting suspend.\n"); return -ENODEV; } - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend(dev); + return i915_drm_suspend(&i915->drm); } static int i915_pm_suspend_late(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); /* * We have a suspend ordering issue with the snd-hda driver also @@ -2427,55 +2019,55 @@ static int i915_pm_suspend_late(struct device *kdev) * FIXME: This should be solved with a special hdmi sink device or * similar so that power domains can be employed. */ - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend_late(dev, false); + return i915_drm_suspend_late(&i915->drm, false); } static int i915_pm_poweroff_late(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend_late(dev, true); + return i915_drm_suspend_late(&i915->drm, true); } static int i915_pm_resume_early(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_resume_early(dev); + return i915_drm_resume_early(&i915->drm); } static int i915_pm_resume(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_resume(dev); + return i915_drm_resume(&i915->drm); } /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); int ret; - if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { - ret = i915_drm_suspend(dev); + if (i915->drm.switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend(&i915->drm); if (ret) return ret; } - ret = i915_gem_freeze(kdev_to_i915(kdev)); + ret = i915_gem_freeze(i915); if (ret) return ret; @@ -2484,16 +2076,16 @@ static int i915_pm_freeze(struct device *kdev) static int i915_pm_freeze_late(struct device *kdev) { - struct drm_device *dev = &kdev_to_i915(kdev)->drm; + struct drm_i915_private *i915 = kdev_to_i915(kdev); int ret; - if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { - ret = i915_drm_suspend_late(dev, true); + if (i915->drm.switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend_late(&i915->drm, true); if (ret) return ret; } - ret = i915_gem_freeze_late(kdev_to_i915(kdev)); + ret = i915_gem_freeze_late(i915); if (ret) return ret; @@ -2550,9 +2142,12 @@ static int i915_pm_restore(struct device *kdev) */ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv) { - struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state; + struct vlv_s0ix_state *s = dev_priv->vlv_s0ix_state; int i; + if (!s) + return; + /* GAM 0x4000-0x4770 */ s->wr_watermark = I915_READ(GEN7_WR_WATERMARK); s->gfx_prio_ctrl = I915_READ(GEN7_GFX_PRIO_CTRL); @@ -2631,10 +2226,13 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv) static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) { - struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state; + struct vlv_s0ix_state *s = dev_priv->vlv_s0ix_state; u32 val; int i; + if (!s) + return; + /* GAM 0x4000-0x4770 */ I915_WRITE(GEN7_WR_WATERMARK, s->wr_watermark); I915_WRITE(GEN7_GFX_PRIO_CTRL, s->gfx_prio_ctrl); @@ -2843,8 +2441,7 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv) if (err) goto err2; - if (!IS_CHERRYVIEW(dev_priv)) - vlv_save_gunit_s0ix_state(dev_priv); + vlv_save_gunit_s0ix_state(dev_priv); err = vlv_force_gfx_clock(dev_priv, false); if (err) @@ -2874,8 +2471,7 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv, */ ret = vlv_force_gfx_clock(dev_priv, true); - if (!IS_CHERRYVIEW(dev_priv)) - vlv_restore_gunit_s0ix_state(dev_priv); + vlv_restore_gunit_s0ix_state(dev_priv); err = vlv_allow_gt_wake(dev_priv, true); if (!ret) @@ -2895,14 +2491,9 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv, static int intel_runtime_suspend(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(kdev); - struct drm_device *dev = pci_get_drvdata(pdev); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - int ret; - - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv)))) - return -ENODEV; + int ret = 0; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) return -ENODEV; @@ -2917,24 +2508,16 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); - intel_uc_runtime_suspend(dev_priv); + intel_gt_runtime_suspend(&dev_priv->gt); intel_runtime_pm_disable_interrupts(dev_priv); intel_uncore_suspend(&dev_priv->uncore); - ret = 0; - if (INTEL_GEN(dev_priv) >= 11) { - icl_display_core_uninit(dev_priv); - bxt_enable_dc9(dev_priv); - } else if (IS_GEN9_LP(dev_priv)) { - bxt_display_core_uninit(dev_priv); - bxt_enable_dc9(dev_priv); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - hsw_enable_pc8(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_display_power_suspend(dev_priv); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) ret = vlv_suspend_complete(dev_priv); - } if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); @@ -2942,10 +2525,9 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); - intel_uc_resume(dev_priv); + intel_gt_runtime_resume(&dev_priv->gt); - i915_gem_init_swizzling(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); enable_rpm_wakeref_asserts(rpm); @@ -2953,7 +2535,7 @@ static int intel_runtime_suspend(struct device *kdev) } enable_rpm_wakeref_asserts(rpm); - intel_runtime_pm_cleanup(rpm); + intel_runtime_pm_driver_release(rpm); if (intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); @@ -2994,9 +2576,7 @@ static int intel_runtime_suspend(struct device *kdev) static int intel_runtime_resume(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(kdev); - struct drm_device *dev = pci_get_drvdata(pdev); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; int ret = 0; @@ -3013,41 +2593,21 @@ static int intel_runtime_resume(struct device *kdev) if (intel_uncore_unclaimed_mmio(&dev_priv->uncore)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); - if (INTEL_GEN(dev_priv) >= 11) { - bxt_disable_dc9(dev_priv); - icl_display_core_init(dev_priv, true); - if (dev_priv->csr.dmc_payload) { - if (dev_priv->csr.allowed_dc_mask & - DC_STATE_EN_UPTO_DC6) - skl_enable_dc6(dev_priv); - else if (dev_priv->csr.allowed_dc_mask & - DC_STATE_EN_UPTO_DC5) - gen9_enable_dc5(dev_priv); - } - } else if (IS_GEN9_LP(dev_priv)) { - bxt_disable_dc9(dev_priv); - bxt_display_core_init(dev_priv, true); - if (dev_priv->csr.dmc_payload && - (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)) - gen9_enable_dc5(dev_priv); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - hsw_disable_pc8(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_display_power_resume(dev_priv); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) ret = vlv_resume_prepare(dev_priv, true); - } intel_uncore_runtime_resume(&dev_priv->uncore); intel_runtime_pm_enable_interrupts(dev_priv); - intel_uc_resume(dev_priv); - /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). */ - i915_gem_init_swizzling(dev_priv); - i915_gem_restore_fences(dev_priv); + intel_gt_runtime_resume(&dev_priv->gt); + i915_gem_restore_fences(&dev_priv->ggtt); /* * On VLV/CHV display interrupts are part of the display @@ -3109,18 +2669,12 @@ const struct dev_pm_ops i915_pm_ops = { .runtime_resume = intel_runtime_resume, }; -static const struct vm_operations_struct i915_gem_vm_ops = { - .fault = i915_gem_fault, - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - static const struct file_operations i915_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release, .unlocked_ioctl = drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = i915_gem_mmap, .poll = drm_poll, .read = drm_read, .compat_ioctl = i915_compat_ioctl, @@ -3167,7 +2721,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_OFFSET, i915_gem_mmap_offset_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW), @@ -3188,9 +2742,9 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW), }; @@ -3200,7 +2754,7 @@ static struct drm_driver driver = { * deal with them for Intel hardware. */ .driver_features = - DRIVER_GEM | DRIVER_PRIME | + DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, .release = i915_driver_release, .open = i915_driver_open, @@ -3209,15 +2763,18 @@ static struct drm_driver driver = { .gem_close_object = i915_gem_close_object, .gem_free_object_unlocked = i915_gem_free_object, - .gem_vm_ops = &i915_gem_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = i915_gem_prime_export, .gem_prime_import = i915_gem_prime_import, + .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, + .get_scanout_position = i915_get_crtc_scanoutpos, + .dumb_create = i915_gem_dumb_create, - .dumb_map_offset = i915_gem_mmap_gtt, + .dumb_map_offset = i915_gem_dumb_mmap_offset, + .ioctls = i915_ioctls, .num_ioctls = ARRAY_SIZE(i915_ioctls), .fops = &i915_driver_fops, @@ -3228,7 +2785,3 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_drm.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fe7a6ec2c199..077af22b8340 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -43,9 +43,10 @@ #include <linux/mm_types.h> #include <linux/perf_event.h> #include <linux/pm_qos.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/shmem_fs.h> #include <linux/stackdepot.h> +#include <linux/xarray.h> #include <drm/intel-gtt.h> #include <drm/drm_legacy.h> /* for struct drm_dma_handle */ @@ -67,29 +68,41 @@ #include "display/intel_display.h" #include "display/intel_display_power.h" #include "display/intel_dpll_mgr.h" +#include "display/intel_dsb.h" #include "display/intel_frontbuffer.h" +#include "display/intel_gmbus.h" #include "display/intel_opregion.h" +#include "gem/i915_gem_context_types.h" +#include "gem/i915_gem_shrinker.h" +#include "gem/i915_gem_stolen.h" + #include "gt/intel_lrc.h" #include "gt/intel_engine.h" +#include "gt/intel_gt_types.h" #include "gt/intel_workarounds.h" +#include "gt/uc/intel_uc.h" #include "intel_device_info.h" +#include "intel_pch.h" #include "intel_runtime_pm.h" -#include "intel_uc.h" +#include "intel_memory_region.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_wopcm.h" #include "i915_gem.h" -#include "gem/i915_gem_context_types.h" #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" +#include "i915_perf_types.h" #include "i915_request.h" #include "i915_scheduler.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "i915_vma.h" +#include "i915_irq.h" + +#include "intel_region_lmem.h" #include "intel_gvt.h" @@ -98,45 +111,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190619" -#define DRIVER_TIMESTAMP 1560947544 - -/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and - * WARN_ON()) for hw state sanity checks to check for unexpected conditions - * which may not necessarily be a user visible problem. This will either - * WARN() or DRM_ERROR() depending on the verbose_checks moduleparam, to - * enable distros and users to tailor their preferred amount of i915 abrt - * spam. - */ -#define I915_STATE_WARN(condition, format...) ({ \ - int __ret_warn_on = !!(condition); \ - if (unlikely(__ret_warn_on)) \ - if (!WARN(i915_modparams.verbose_state_checks, format)) \ - DRM_ERROR(format); \ - unlikely(__ret_warn_on); \ -}) - -#define I915_STATE_WARN_ON(x) \ - I915_STATE_WARN((x), "%s", "WARN_ON(" __stringify(x) ")") - -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) - -bool __i915_inject_load_failure(const char *func, int line); -#define i915_inject_load_failure() \ - __i915_inject_load_failure(__func__, __LINE__) - -bool i915_error_injected(void); - -#else - -#define i915_inject_load_failure() false -#define i915_error_injected() false - -#endif - -#define i915_load_error(i915, fmt, ...) \ - __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ - fmt, ##__VA_ARGS__) +#define DRIVER_DATE "20200114" +#define DRIVER_TIMESTAMP 1579001978 struct drm_i915_gem_object; @@ -152,6 +128,10 @@ enum hpd_pin { HPD_PORT_D, HPD_PORT_E, HPD_PORT_F, + HPD_PORT_G, + HPD_PORT_H, + HPD_PORT_I, + HPD_NUM_PINS }; @@ -162,7 +142,7 @@ enum hpd_pin { #define HPD_STORM_DEFAULT_THRESHOLD 50 struct i915_hotplug { - struct work_struct hotplug_work; + struct delayed_work hotplug_work; struct { unsigned long last_jiffies; @@ -174,6 +154,7 @@ struct i915_hotplug { } state; } stats[HPD_NUM_PINS]; u32 event_bits; + u32 retry_bits; struct delayed_work reenable_work; u32 long_port_mask; @@ -210,15 +191,18 @@ struct i915_mmu_object; struct drm_i915_file_private { struct drm_i915_private *dev_priv; - struct drm_file *file; + + union { + struct drm_file *file; + struct rcu_head rcu; + }; struct { spinlock_t lock; struct list_head request_list; } mm; - struct idr context_idr; - struct mutex context_idr_lock; /* guards context_idr */ + struct xarray context_xa; struct idr vm_idr; struct mutex vm_idr_lock; /* guards vm_idr */ @@ -286,17 +270,18 @@ struct drm_i915_display_funcs { enum pipe pipe); int (*get_fifo_size)(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane); - int (*compute_pipe_wm)(struct intel_crtc_state *cstate); - int (*compute_intermediate_wm)(struct intel_crtc_state *newstate); + int (*compute_pipe_wm)(struct intel_crtc_state *crtc_state); + int (*compute_intermediate_wm)(struct intel_crtc_state *crtc_state); void (*initial_watermarks)(struct intel_atomic_state *state, - struct intel_crtc_state *cstate); + struct intel_crtc *crtc); void (*atomic_update_watermarks)(struct intel_atomic_state *state, - struct intel_crtc_state *cstate); + struct intel_crtc *crtc); void (*optimize_watermarks)(struct intel_atomic_state *state, - struct intel_crtc_state *cstate); + struct intel_crtc *crtc); int (*compute_global_watermarks)(struct intel_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct intel_atomic_state *state); + u8 (*calc_voltage_level)(int cdclk); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, @@ -305,11 +290,12 @@ struct drm_i915_display_funcs { struct intel_initial_plane_config *); int (*crtc_compute_clock)(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); - void (*crtc_enable)(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state); - void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state); - void (*update_crtcs)(struct drm_atomic_state *state); + void (*crtc_enable)(struct intel_atomic_state *state, + struct intel_crtc *crtc); + void (*crtc_disable)(struct intel_atomic_state *state, + struct intel_crtc *crtc); + void (*commit_modeset_enables)(struct intel_atomic_state *state); + void (*commit_modeset_disables)(struct intel_atomic_state *state); void (*audio_codec_enable)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); @@ -356,6 +342,7 @@ struct intel_csr { i915_reg_t mmioaddr[20]; u32 mmiodata[20]; u32 dc_state; + u32 target_dc_state; u32 allowed_dc_mask; intel_wakeref_t wakeref; }; @@ -379,7 +366,6 @@ struct intel_fbc { unsigned threshold; unsigned int possible_framebuffer_bits; unsigned int busy_bits; - unsigned int visible_pipes_mask; struct intel_crtc *crtc; struct drm_mm_node compressed_fb; @@ -387,8 +373,8 @@ struct intel_fbc { bool false_color; - bool enabled; bool active; + bool activated; bool flip_pending; bool underrun_detected; @@ -400,9 +386,6 @@ struct intel_fbc { * these problems. */ struct intel_fbc_state_cache { - struct i915_vma *vma; - unsigned long flags; - struct { unsigned int mode_flags; u32 hsw_bdw_pixel_rate; @@ -431,6 +414,8 @@ struct intel_fbc { const struct drm_format_info *format; unsigned int stride; } fb; + u16 gen9_wa_cfb_stride; + s8 fence_id; } state_cache; /* @@ -441,9 +426,6 @@ struct intel_fbc { * are supposed to read from it in order to program the registers. */ struct intel_fbc_reg_params { - struct i915_vma *vma; - unsigned long flags; - struct { enum pipe pipe; enum i9xx_plane_id i9xx_plane; @@ -456,7 +438,9 @@ struct intel_fbc { } fb; int cfb_size; - unsigned int gen9_wa_cfb_stride; + u16 gen9_wa_cfb_stride; + s8 fence_id; + bool plane_visible; } params; const char *no_fbc_reason; @@ -504,6 +488,7 @@ struct i915_psr { bool enabled; struct intel_dp *dp; enum pipe pipe; + enum transcoder transcoder; bool active; struct work_struct work; unsigned busy_frontbuffer_bits; @@ -517,24 +502,10 @@ struct i915_psr { bool sink_not_reliable; bool irq_aux_error; u16 su_x_granularity; -}; - -/* - * Sorted by south display engine compatibility. - * If the new PCH comes with a south display engine that is not - * inherited from the latest item, please do not add it to the - * end. Instead, add it right after its "parent" PCH. - */ -enum intel_pch { - PCH_NOP = -1, /* PCH without south display */ - PCH_NONE = 0, /* No PCH present */ - PCH_IBX, /* Ibexpeak PCH */ - PCH_CPT, /* Cougarpoint/Pantherpoint PCH */ - PCH_LPT, /* Lynxpoint/Wildcatpoint PCH */ - PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */ - PCH_CNP, /* Cannon/Comet Lake PCH */ - PCH_ICP, /* Ice Lake PCH */ - PCH_MCC, /* Mule Creek Canyon PCH */ + bool dc3co_enabled; + u32 dc3co_exit_delay; + struct delayed_work idle_work; + bool initially_probed; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) @@ -570,167 +541,7 @@ struct i915_suspend_saved_registers { u16 saveGCDGMBUS; }; -struct vlv_s0ix_state { - /* GAM */ - u32 wr_watermark; - u32 gfx_prio_ctrl; - u32 arb_mode; - u32 gfx_pend_tlb0; - u32 gfx_pend_tlb1; - u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM]; - u32 media_max_req_count; - u32 gfx_max_req_count; - u32 render_hwsp; - u32 ecochk; - u32 bsd_hwsp; - u32 blt_hwsp; - u32 tlb_rd_addr; - - /* MBC */ - u32 g3dctl; - u32 gsckgctl; - u32 mbctl; - - /* GCP */ - u32 ucgctl1; - u32 ucgctl3; - u32 rcgctl1; - u32 rcgctl2; - u32 rstctl; - u32 misccpctl; - - /* GPM */ - u32 gfxpause; - u32 rpdeuhwtc; - u32 rpdeuc; - u32 ecobus; - u32 pwrdwnupctl; - u32 rp_down_timeout; - u32 rp_deucsw; - u32 rcubmabdtmr; - u32 rcedata; - u32 spare2gh; - - /* Display 1 CZ domain */ - u32 gt_imr; - u32 gt_ier; - u32 pm_imr; - u32 pm_ier; - u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM]; - - /* GT SA CZ domain */ - u32 tilectl; - u32 gt_fifoctl; - u32 gtlc_wake_ctrl; - u32 gtlc_survive; - u32 pmwgicz; - - /* Display 2 CZ domain */ - u32 gu_ctl0; - u32 gu_ctl1; - u32 pcbr; - u32 clock_gate_dis2; -}; - -struct intel_rps_ei { - ktime_t ktime; - u32 render_c0; - u32 media_c0; -}; - -struct intel_rps { - struct mutex lock; /* protects enabling and the worker */ - - /* - * work, interrupts_enabled and pm_iir are protected by - * dev_priv->irq_lock - */ - struct work_struct work; - bool interrupts_enabled; - u32 pm_iir; - - /* PM interrupt bits that should never be masked */ - u32 pm_intrmsk_mbz; - - /* Frequencies are stored in potentially platform dependent multiples. - * In other words, *_freq needs to be multiplied by X to be interesting. - * Soft limits are those which are used for the dynamic reclocking done - * by the driver (raise frequencies under heavy loads, and lower for - * lighter loads). Hard limits are those imposed by the hardware. - * - * A distinction is made for overclocking, which is never enabled by - * default, and is considered to be above the hard limit if it's - * possible at all. - */ - u8 cur_freq; /* Current frequency (cached, may not == HW) */ - u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ - u8 max_freq_softlimit; /* Max frequency permitted by the driver */ - u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ - u8 min_freq; /* AKA RPn. Minimum frequency */ - u8 boost_freq; /* Frequency to request when wait boosting */ - u8 idle_freq; /* Frequency to request when we are idle */ - u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ - u8 rp1_freq; /* "less than" RP0 power/freqency */ - u8 rp0_freq; /* Non-overclocked max frequency. */ - u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ - - int last_adj; - - struct { - struct mutex mutex; - - enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; - unsigned int interactive; - - u8 up_threshold; /* Current %busy required to uplock */ - u8 down_threshold; /* Current %busy required to downclock */ - } power; - - bool enabled; - atomic_t num_waiters; - atomic_t boosts; - - /* manual wa residency calculations */ - struct intel_rps_ei ei; -}; - -struct intel_rc6 { - bool enabled; - u64 prev_hw_residency[4]; - u64 cur_residency[4]; -}; - -struct intel_llc_pstate { - bool enabled; -}; - -struct intel_gen6_power_mgmt { - struct intel_rps rps; - struct intel_rc6 rc6; - struct intel_llc_pstate llc_pstate; -}; - -/* defined intel_pm.c */ -extern spinlock_t mchdev_lock; - -struct intel_ilk_power_mgmt { - u8 cur_delay; - u8 min_delay; - u8 max_delay; - u8 fmax; - u8 fstart; - - u64 last_count1; - unsigned long last_time1; - unsigned long chipset_power; - u64 last_count2; - u64 last_time2; - unsigned long gfx_power; - u8 corr; - - int c_m; - int r_t; -}; +struct vlv_s0ix_state; #define MAX_L3_SLICES 2 struct intel_l3_parity { @@ -764,7 +575,6 @@ struct i915_gem_mm { */ struct llist_head free_list; struct work_struct free_work; - spinlock_t free_lock; /** * Count of objects pending destructions. Used to skip needlessly * waiting on an RCU barrier if no objects are waiting to be freed. @@ -781,8 +591,7 @@ struct i915_gem_mm { */ struct vfsmount *gemfs; - /** PPGTT used for aliasing the PPGTT with the GTT */ - struct i915_ppgtt *aliasing_ppgtt; + struct intel_memory_region *regions[INTEL_REGION_UNKNOWN]; struct notifier_block oom_notifier; struct notifier_block vmap_notifier; @@ -795,16 +604,6 @@ struct i915_gem_mm { */ struct workqueue_struct *userptr_wq; - u64 unordered_timeline; - - /* the indicator for dispatch video commands on two BSD rings */ - atomic_t bsd_engine_dispatch_index; - - /** Bit 6 swizzling required for X tiling */ - u32 bit_6_swizzle_x; - /** Bit 6 swizzling required for Y tiling */ - u32 bit_6_swizzle_y; - /* shrinker accounting, also useful for userland debugging */ u64 shrink_memory; u32 shrink_count; @@ -820,19 +619,18 @@ struct i915_gem_mm { #define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */ +/* Amount of SAGV/QGV points, BSpec precisely defines this */ +#define I915_NUM_QGV_POINTS 8 + struct ddi_vbt_port_info { /* Non-NULL if port present. */ const struct child_device_config *child; int max_tmds_clock; - /* - * This is an index in the HDMI/DVI DDI buffer translation table. - * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't - * populate this field. - */ -#define HDMI_LEVEL_SHIFT_UNKNOWN 0xff + /* This is an index in the HDMI/DVI DDI buffer translation table. */ u8 hdmi_level_shift; + u8 hdmi_level_shift_set:1; u8 supports_dvi:1; u8 supports_hdmi:1; @@ -923,8 +721,7 @@ struct intel_vbt_data { int crt_ddc_pin; - int child_dev_num; - struct child_device_config *child_dev; + struct list_head display_devices; struct ddi_vbt_port_info ddi_port_info[I915_MAX_PORTS]; struct sdvo_device_mapping sdvo_mappings[2]; @@ -1073,6 +870,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu { + struct mutex lock; /* serialises sending of g2v_notify command pkts */ bool active; u32 caps; }; @@ -1084,230 +882,15 @@ struct intel_wm_config { bool sprites_scaled; }; -struct i915_oa_format { - u32 format; - int size; -}; - -struct i915_oa_reg { - i915_reg_t addr; - u32 value; -}; - -struct i915_oa_config { - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct i915_oa_reg *mux_regs; - u32 mux_regs_len; - const struct i915_oa_reg *b_counter_regs; - u32 b_counter_regs_len; - const struct i915_oa_reg *flex_regs; - u32 flex_regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; - - atomic_t ref_count; -}; - -struct i915_perf_stream; - -/** - * struct i915_perf_stream_ops - the OPs to support a specific stream type - */ -struct i915_perf_stream_ops { - /** - * @enable: Enables the collection of HW samples, either in response to - * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened - * without `I915_PERF_FLAG_DISABLED`. - */ - void (*enable)(struct i915_perf_stream *stream); - - /** - * @disable: Disables the collection of HW samples, either in response - * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying - * the stream. - */ - void (*disable)(struct i915_perf_stream *stream); - - /** - * @poll_wait: Call poll_wait, passing a wait queue that will be woken - * once there is something ready to read() for the stream - */ - void (*poll_wait)(struct i915_perf_stream *stream, - struct file *file, - poll_table *wait); - - /** - * @wait_unlocked: For handling a blocking read, wait until there is - * something to ready to read() for the stream. E.g. wait on the same - * wait queue that would be passed to poll_wait(). - */ - int (*wait_unlocked)(struct i915_perf_stream *stream); - - /** - * @read: Copy buffered metrics as records to userspace - * **buf**: the userspace, destination buffer - * **count**: the number of bytes to copy, requested by userspace - * **offset**: zero at the start of the read, updated as the read - * proceeds, it represents how many bytes have been copied so far and - * the buffer offset for copying the next record. - * - * Copy as many buffered i915 perf samples and records for this stream - * to userspace as will fit in the given buffer. - * - * Only write complete records; returning -%ENOSPC if there isn't room - * for a complete record. - * - * Return any error condition that results in a short read such as - * -%ENOSPC or -%EFAULT, even though these may be squashed before - * returning to userspace. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @destroy: Cleanup any stream specific resources. - * - * The stream will always be disabled before this is called. - */ - void (*destroy)(struct i915_perf_stream *stream); -}; - -/** - * struct i915_perf_stream - state for a single open stream FD - */ -struct i915_perf_stream { - /** - * @dev_priv: i915 drm device - */ - struct drm_i915_private *dev_priv; - - /** - * @link: Links the stream into ``&drm_i915_private->streams`` - */ - struct list_head link; - - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - - /** - * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` - * properties given when opening a stream, representing the contents - * of a single sample as read() by userspace. - */ - u32 sample_flags; - - /** - * @sample_size: Considering the configured contents of a sample - * combined with the required header size, this is the total size - * of a single sample record. - */ - int sample_size; - - /** - * @ctx: %NULL if measuring system-wide across all contexts or a - * specific context that is being monitored. - */ - struct i915_gem_context *ctx; - - /** - * @enabled: Whether the stream is currently enabled, considering - * whether the stream was opened in a disabled state and based - * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. - */ - bool enabled; - - /** - * @ops: The callbacks providing the implementation of this specific - * type of configured stream. - */ - const struct i915_perf_stream_ops *ops; - - /** - * @oa_config: The OA configuration used by the stream. - */ - struct i915_oa_config *oa_config; -}; - -/** - * struct i915_oa_ops - Gen specific implementation of an OA unit stream - */ -struct i915_oa_ops { - /** - * @is_valid_b_counter_reg: Validates register's address for - * programming boolean counters for a particular platform. - */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); - - /** - * @is_valid_mux_reg: Validates register's address for programming mux - * for a particular platform. - */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @is_valid_flex_reg: Validates register's address for programming - * flex EU filtering for a particular platform. - */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @enable_metric_set: Selects and applies any MUX configuration to set - * up the Boolean and Custom (B/C) counters that are part of the - * counter reports being sampled. May apply system constraints such as - * disabling EU clock gating as required. - */ - int (*enable_metric_set)(struct i915_perf_stream *stream); - - /** - * @disable_metric_set: Remove system constraints associated with using - * the OA unit. - */ - void (*disable_metric_set)(struct drm_i915_private *dev_priv); - - /** - * @oa_enable: Enable periodic sampling - */ - void (*oa_enable)(struct i915_perf_stream *stream); - - /** - * @oa_disable: Disable periodic sampling - */ - void (*oa_disable)(struct i915_perf_stream *stream); - - /** - * @read: Copy data from the circular OA buffer into a given userspace - * buffer. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @oa_hw_tail_read: read the OA tail pointer register - * - * In particular this enables us to share all the fiddly code for - * handling the OA unit tail pointer race that affects multiple - * generations. - */ - u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv); -}; - struct intel_cdclk_state { unsigned int cdclk, vco, ref, bypass; u8 voltage_level; }; +struct i915_selftest_stash { + atomic_t counter; +}; + struct drm_i915_private { struct drm_device drm; @@ -1340,6 +923,7 @@ struct drm_i915_private { resource_size_t stolen_usable_size; /* Total size minus reserved ranges */ struct intel_uncore uncore; + struct intel_uncore_mmio_debug mmio_debug; struct i915_virtual_gpu vgpu; @@ -1347,9 +931,6 @@ struct drm_i915_private { struct intel_wopcm wopcm; - struct intel_huc huc; - struct intel_guc guc; - struct intel_csr csr; struct intel_gmbus gmbus[GMBUS_NUM_PINS]; @@ -1364,23 +945,19 @@ struct drm_i915_private { */ u32 gpio_mmio_base; + u32 hsw_psr_mmio_adjust; + /* MMIO base address for MIPI regs */ u32 mipi_mmio_base; - u32 psr_mmio_base; - u32 pps_mmio_base; wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; + struct intel_engine_cs *engine[I915_NUM_ENGINES]; - /* Context used internally to idle the GPU and setup initial state */ - struct i915_gem_context *kernel_context; - /* Context only to be used for injecting preemption commands */ - struct i915_gem_context *preempt_context; - struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] - [MAX_ENGINE_INSTANCE + 1]; + struct rb_root uabi_engines; struct resource mch_res; @@ -1401,11 +978,6 @@ struct drm_i915_private { u32 irq_mask; u32 de_irq_mask[I915_MAX_PIPES]; }; - u32 gt_irq_mask; - u32 pm_imr; - u32 pm_ier; - u32 pm_rps_events; - u32 pm_guc_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; @@ -1422,9 +994,6 @@ struct drm_i915_private { /* backlight registers and fields in struct intel_panel */ struct mutex backlight_lock; - /* LVDS info */ - bool no_aux_handshake; - /* protects panel power sequencer state */ struct mutex pps_mutex; @@ -1438,13 +1007,14 @@ struct drm_i915_private { unsigned int fdi_pll_freq; unsigned int czclk_freq; + /* + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ struct { /* * The current logical cdclk state. * See intel_atomic_state.cdclk.logical - * - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. */ struct intel_cdclk_state logical; /* @@ -1455,6 +1025,9 @@ struct drm_i915_private { /* The current hardware cdclk state */ struct intel_cdclk_state hw; + /* cdclk, divider, and ratio table from bspec */ + const struct intel_cdclk_vals *table; + int force_min_cdclk; } cdclk; @@ -1469,6 +1042,8 @@ struct drm_i915_private { /* ordered wq for modesets */ struct workqueue_struct *modeset_wq; + /* unbound hipri wq for page flips/plane updates */ + struct workqueue_struct *flip_wq; /* Display functions */ struct drm_i915_display_funcs display; @@ -1488,8 +1063,6 @@ struct drm_i915_private { DECLARE_HASHTABLE(mm_structs, 7); struct mutex mm_lock; - struct intel_ppat ppat; - /* Kernel Modesetting */ struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; @@ -1511,7 +1084,11 @@ struct drm_i915_private { */ struct mutex dpll_lock; - unsigned int active_crtcs; + /* + * For reading active_pipes, min_cdclk, min_voltage_level holding + * any crtc lock is sufficient, for writing must hold all of them. + */ + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ @@ -1540,13 +1117,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* gen6+ GT PM state */ - struct intel_gen6_power_mgmt gt_pm; - - /* ilk-only ips/rps state. Everything in here is protected by the global - * mchdev_lock in intel_pm.c */ - struct intel_ilk_power_mgmt ips; - struct i915_power_domains power_domains; struct i915_psr psr; @@ -1571,23 +1141,7 @@ struct drm_i915_private { */ struct mutex av_mutex; int audio_power_refcount; - - struct { - struct mutex mutex; - struct list_head list; - struct llist_head free_list; - struct work_struct free_work; - - /* The hw wants to have a stable context identifier for the - * lifetime of the context (for OA, PASID, faults, etc). - * This is limited in execlists to 21 bits. - */ - struct ida hw_ida; -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ - struct list_head hw_id_list; - } contexts; + u32 audio_freq_cntrl; u32 fdi_rx_config; @@ -1604,7 +1158,7 @@ struct drm_i915_private { u32 suspend_count; bool power_domains_suspended; struct i915_suspend_saved_registers regfile; - struct vlv_s0ix_state vlv_s0ix_state; + struct vlv_s0ix_state *vlv_s0ix_state; enum { I915_SAGV_UNKNOWN = 0, @@ -1613,6 +1167,8 @@ struct drm_i915_private { I915_SAGV_NOT_CONTROLLED } sagv_status; + u32 sagv_block_time_us; + struct { /* * Raw watermark latency values: @@ -1645,7 +1201,7 @@ struct drm_i915_private { /* * Should be held around atomic WM register writing; also * protects * intel_crtc->wm.active and - * cstate->wm.need_postvbl_update. + * crtc_state->wm.need_postvbl_update. */ struct mutex wm_mutex; @@ -1674,7 +1230,8 @@ struct drm_i915_private { } dram_info; struct intel_bw_info { - unsigned int deratedbw[3]; /* for each QGV point */ + /* for each QGV point */ + unsigned int deratedbw[I915_NUM_QGV_POINTS]; u8 num_qgv_points; u8 num_planes; } max_bw[6]; @@ -1683,210 +1240,35 @@ struct drm_i915_private { struct intel_runtime_pm runtime_pm; - struct { - bool initialized; - - struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; - - /* - * Lock associated with adding/modifying/removing OA configs - * in dev_priv->perf.metrics_idr. - */ - struct mutex metrics_lock; - - /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. - */ - struct idr metrics_idr; - - /* - * Lock associated with anything below within this structure - * except exclusive_stream. - */ - struct mutex lock; - struct list_head streams; - - struct { - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. - */ - struct i915_perf_stream *exclusive_stream; - - struct intel_context *pinned_ctx; - u32 specific_ctx_id; - u32 specific_ctx_id_mask; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - - /** - * For rate limiting any notifications of spurious - * invalid OA reports - */ - struct ratelimit_state spurious_report_rs; - - bool periodic; - int period_exponent; - - struct i915_oa_config test_config; - - struct { - struct i915_vma *vma; - u8 *vaddr; - u32 last_ctx_id; - int format; - int format_size; - - /** - * Locks reads and writes to all head/tail state - * - * Consider: the head and tail pointer state - * needs to be read consistently from a hrtimer - * callback (atomic context) and read() fop - * (user context) with tail pointer updates - * happening in atomic context and head updates - * in user context and the (unlikely) - * possibility of read() errors needing to - * reset all head/tail state. - * - * Note: Contention or performance aren't - * currently a significant concern here - * considering the relatively low frequency of - * hrtimer callbacks (5ms period) and that - * reads typically only happen in response to a - * hrtimer event and likely complete before the - * next callback. - * - * Note: This lock is not held *while* reading - * and copying data to userspace so the value - * of head observed in htrimer callbacks won't - * represent any partial consumption of data. - */ - spinlock_t ptr_lock; - - /** - * One 'aging' tail pointer and one 'aged' - * tail pointer ready to used for reading. - * - * Initial values of 0xffffffff are invalid - * and imply that an update is required - * (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * Index for the aged tail ready to read() - * data up to. - */ - unsigned int aged_tail_idx; - - /** - * A monotonic timestamp for when the current - * aging tail pointer was read; used to - * determine when it is old enough to trust. - */ - u64 aging_timestamp; - - /** - * Although we can always read back the head - * pointer register, we prefer to avoid - * trusting the HW state, just to avoid any - * risk that some hardware condition could - * somehow bump the head pointer unpredictably - * and cause us to forward the wrong OA buffer - * data to userspace. - */ - u32 head; - } oa_buffer; - - u32 gen7_latched_oastatus1; - u32 ctx_oactxctrl_offset; - u32 ctx_flexeu0_offset; - - /** - * The RPT_ID/reason field for Gen8+ includes a bit - * to determine if the CTX ID in the report is valid - * but the specific bit differs between Gen 8 and 9 - */ - u32 gen8_valid_ctx_bit; - - struct i915_oa_ops ops; - const struct i915_oa_format *oa_formats; - } oa; - } perf; + struct i915_perf perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ - struct { - struct i915_gt_timelines { - struct mutex mutex; /* protects list, tainted by GPU */ - struct list_head active_list; - - /* Pack multiple timelines' seqnos into the same page */ - spinlock_t hwsp_lock; - struct list_head hwsp_free_list; - } timelines; + struct intel_gt gt; - struct list_head active_rings; - - struct intel_wakeref wakeref; - - struct list_head closed_vma; - spinlock_t closed_lock; /* guards the list of closed_vma */ - - /** - * Is the GPU currently considered idle, or busy executing - * userspace requests? Whilst idle, we allow runtime power - * management to power down the hardware and display clocks. - * In order to reduce the effect on performance, there - * is a slight delay before we do so. - */ - intel_wakeref_t awake; - - struct blocking_notifier_head pm_notifications; - - ktime_t last_init_time; + struct { + struct i915_gem_contexts { + spinlock_t lock; /* locks list */ + struct list_head list; - struct i915_vma *scratch; - } gt; + struct llist_head free_list; + struct work_struct free_work; + } contexts; - struct { - struct notifier_block pm_notifier; - - /** - * We leave the user IRQ off as much as possible, - * but this means that requests will finish and never - * be retired once the system goes idle. Set a timer to - * fire periodically while the ring is running. When it - * fires, go retire requests. - */ - struct delayed_work retire_work; - - /** - * When we detect an idle GPU, we want to turn on - * powersaving features. So once we see that there - * are no more requests outstanding and no more - * arrive within a small period of time, we fire - * off the idle_work. + /* + * We replace the local file with a global mappings as the + * backing storage for the mmap is on the device and not + * on the struct file, and we do not want to prolong the + * lifetime of the local fd. To minimise the number of + * anonymous inodes we create, we use a global singleton to + * share the global mapping. */ - struct work_struct idle_work; + struct file *mmap_singleton; } gem; - /* For i945gm vblank irq vs. C3 workaround */ - struct { - struct work_struct work; - struct pm_qos_request pm_qos; - u8 c3_disable_latency; - u8 enabled; - } i945gm_vblank; + u8 pch_ssc_use; + + /* For i915gm/i945gm vblank irq workaround */ + u8 vblank_enabled; /* perform PHY state sanity checks? */ bool chv_phy_assert[2]; @@ -1910,6 +1292,8 @@ struct drm_i915_private { /* Mutex to protect the above hdcp component related values. */ struct mutex hdcp_comp_mutex; + I915_SELFTEST_DECLARE(struct i915_selftest_stash selftest;) + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -1933,27 +1317,12 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) { - return to_i915(dev_get_drvdata(kdev)); -} - -static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) -{ - return container_of(wopcm, struct drm_i915_private, wopcm); -} - -static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) -{ - return container_of(guc, struct drm_i915_private, guc); -} - -static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) -{ - return container_of(huc, struct drm_i915_private, huc); + return dev_get_drvdata(kdev); } -static inline struct drm_i915_private *uncore_to_i915(struct intel_uncore *uncore) +static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) { - return container_of(uncore, struct drm_i915_private, uncore); + return pci_get_drvdata(pdev); } /* Simple iterator over all initialised engines */ @@ -1964,18 +1333,19 @@ static inline struct drm_i915_private *uncore_to_i915(struct intel_uncore *uncor for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) /* Iterator over subset of engines selected by mask */ -#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \ +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ + for ((tmp__) = (mask__) & INTEL_INFO((gt__)->i915)->engine_mask; \ (tmp__) ? \ - ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ + ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ 0;) -enum hdmi_force_audio { - HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ - HDMI_AUDIO_OFF, /* force turn off HDMI audio */ - HDMI_AUDIO_AUTO, /* trust EDID */ - HDMI_AUDIO_ON, /* force turn on HDMI audio */ -}; +#define rb_to_uabi_engine(rb) \ + rb_entry_safe(rb, struct intel_engine_cs, uabi_node) + +#define for_each_uabi_engine(engine__, i915__) \ + for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ + (engine__); \ + (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) #define I915_GTT_OFFSET_NONE ((u32)-1) @@ -2022,6 +1392,8 @@ enum hdmi_force_audio { (BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \ INTEL_INFO(dev_priv)->gen == (n)) +#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) + /* * Return true if revision is in range [since,until] inclusive. * @@ -2093,6 +1465,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, } #define IS_MOBILE(dev_priv) (INTEL_INFO(dev_priv)->is_mobile) +#define IS_DGFX(dev_priv) (INTEL_INFO(dev_priv)->is_dgfx) #define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830) #define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G) @@ -2127,6 +1500,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_ELKHARTLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE) +#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_TIGERLAKE) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -2226,6 +1600,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_REVID(p, since, until) \ (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define TGL_REVID_A0 0x0 + +#define IS_TGL_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) @@ -2243,9 +1622,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define VEBOX_MASK(dev_priv) \ ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) + #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) @@ -2278,10 +1664,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) + /* WaRsDisableCoarsePowerGating:skl,cnl */ -#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) +#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ + (IS_CANNONLAKE(dev_priv) || \ + IS_SKL_GT3(dev_priv) || \ + IS_SKL_GT4(dev_priv)) #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ @@ -2323,63 +1713,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) -/* - * For now, anything with a GuC requires uCode loading, and then supports - * command submission once loaded. But these are logically independent - * properties, so we have separate macros to test them. - */ -#define HAS_GUC(dev_priv) (INTEL_INFO(dev_priv)->has_guc) -#define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) -#define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) +#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) +#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) -/* For now, anything with a GuC has also HuC */ -#define HAS_HUC(dev_priv) (HAS_GUC(dev_priv)) -#define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) +#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) -/* Having a GuC is not the same as using a GuC */ -#define USES_GUC(dev_priv) intel_uc_is_using_guc(dev_priv) -#define USES_GUC_SUBMISSION(dev_priv) intel_uc_is_using_guc_submission(dev_priv) -#define USES_HUC(dev_priv) intel_uc_is_using_huc(dev_priv) +/* Having GuC is not the same as using GuC */ +#define USES_GUC(dev_priv) intel_uc_uses_guc(&(dev_priv)->gt.uc) +#define USES_GUC_SUBMISSION(dev_priv) intel_uc_uses_guc_submission(&(dev_priv)->gt.uc) #define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu) -#define INTEL_PCH_DEVICE_ID_MASK 0xff80 -#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 -#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 -#define INTEL_PCH_PPT_DEVICE_ID_TYPE 0x1e00 -#define INTEL_PCH_LPT_DEVICE_ID_TYPE 0x8c00 -#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE 0x9c00 -#define INTEL_PCH_WPT_DEVICE_ID_TYPE 0x8c80 -#define INTEL_PCH_WPT_LP_DEVICE_ID_TYPE 0x9c80 -#define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100 -#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 -#define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 -#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 -#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 -#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 -#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 -#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 -#define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 -#define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 -#define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ - -#define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) -#define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) -#define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC) -#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) -#define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) -#define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) -#define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) -#define HAS_PCH_LPT_LP(dev_priv) \ - (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE || \ - INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE) -#define HAS_PCH_LPT_H(dev_priv) \ - (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_DEVICE_ID_TYPE || \ - INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_DEVICE_ID_TYPE) -#define HAS_PCH_CPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CPT) -#define HAS_PCH_IBX(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_IBX) -#define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) -#define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE) +#define HAS_GLOBAL_MOCS_REGISTERS(dev_priv) (INTEL_INFO(dev_priv)->has_global_mocs) + #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) @@ -2393,9 +1739,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->num_pipes > 0) +#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->pipe_mask)) + +#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) -#include "i915_trace.h" +/* Only valid when HAS_DISPLAY() is true */ +#define INTEL_DISPLAY_ENABLED(dev_priv) (WARN_ON(!HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) static inline bool intel_vtd_active(void) { @@ -2418,47 +1767,20 @@ intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) } /* i915_drv.c */ -void __printf(3, 4) -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...); - -#define i915_report_error(dev_priv, fmt, ...) \ - __i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__) - #ifdef CONFIG_COMPAT -extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg); +long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); #else #define i915_compat_ioctl NULL #endif extern const struct dev_pm_ops i915_pm_ops; -extern int i915_driver_load(struct pci_dev *pdev, - const struct pci_device_id *ent); -extern void i915_driver_unload(struct drm_device *dev); - -extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); -extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); -int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); - -u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv); +int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent); +void i915_driver_remove(struct drm_i915_private *i915); -static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) -{ - unsigned long delay; - - if (unlikely(!i915_modparams.enable_hangcheck)) - return; - - /* Don't continually defer the hangcheck so that it is always run at - * least once after work has been scheduled on any ring. Otherwise, - * we will ignore a hung ring if a second ring is kept busy. - */ +int i915_resume_switcheroo(struct drm_i915_private *i915); +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, - &dev_priv->gpu_error.hangcheck_work, delay); -} +int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { @@ -2470,29 +1792,32 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv) return dev_priv->vgpu.active; } +int i915_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + /* i915_gem.c */ int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); -void i915_gem_sanitize(struct drm_i915_private *i915); -int i915_gem_init_early(struct drm_i915_private *dev_priv); +void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915); + static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { - if (!atomic_read(&i915->mm.free_count)) - return; - - /* A single pass should suffice to release all the freed objects (along + /* + * A single pass should suffice to release all the freed objects (along * most call paths) , but be a little more paranoid in that freeing * the objects does take a little amount of time, during which the rcu * callbacks could have added new objects into the freed list, and * armed the work again. */ - do { + while (atomic_read(&i915->mm.free_count)) { + flush_work(&i915->mm.free_work); rcu_barrier(); - } while (flush_work(&i915->mm.free_work)); + } } static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) @@ -2510,6 +1835,7 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) */ int pass = 3; do { + flush_workqueue(i915->wq); rcu_barrier(); i915_gem_drain_freed_objects(i915); } while (--pass); @@ -2523,7 +1849,10 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); -int i915_gem_object_unbind(struct drm_i915_gem_object *obj); +int i915_gem_object_unbind(struct drm_i915_gem_object *obj, + unsigned long flags); +#define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) +#define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1) void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); @@ -2536,52 +1865,28 @@ i915_mutex_lock_interruptible(struct drm_device *dev) int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, - u32 handle, u64 *offset); -int i915_gem_mmap_gtt_version(void); - -void i915_gem_track_fb(struct drm_i915_gem_object *old, - struct drm_i915_gem_object *new, - unsigned frontbuffer_bits); int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -static inline bool __i915_wedged(struct i915_gpu_error *error) -{ - return unlikely(test_bit(I915_WEDGED, &error->flags)); -} - -static inline bool i915_reset_failed(struct drm_i915_private *i915) -{ - return __i915_wedged(&i915->gpu_error); -} - static inline u32 i915_reset_count(struct i915_gpu_error *error) { - return READ_ONCE(error->reset_count); + return atomic_read(&error->reset_count); } static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, - struct intel_engine_cs *engine) + const struct intel_engine_cs *engine) { - return READ_ONCE(error->reset_engine_count[engine->id]); + return atomic_read(&error->reset_engine_count[engine->uabi_class]); } -void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); - -void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); -int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); -void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); -void i915_gem_fini_hw(struct drm_i915_private *dev_priv); -void i915_gem_fini(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags, long timeout); +void i915_gem_driver_register(struct drm_i915_private *i915); +void i915_gem_driver_unregister(struct drm_i915_private *i915); +void i915_gem_driver_remove(struct drm_i915_private *dev_priv); +void i915_gem_driver_release(struct drm_i915_private *dev_priv); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); -vm_fault_t i915_gem_fault(struct vm_fault *vmf); int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); @@ -2592,13 +1897,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags); +struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); static inline struct i915_gem_context * __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) { - return idr_find(&file_priv->context_idr, id); + return xa_load(&file_priv->context_xa, id); } static inline struct i915_gem_context * @@ -2615,20 +1919,10 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -int i915_perf_open_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *reg_state); - /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, @@ -2636,65 +1930,17 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); -void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv); - -/* belongs in i915_gem_gtt.h */ -static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) -{ - wmb(); - if (INTEL_GEN(dev_priv) < 6) - intel_gtt_chipset_flush(); -} - -/* i915_gem_stolen.c */ -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment); -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment, u64 start, - u64 end); -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node); -int i915_gem_init_stolen(struct drm_i915_private *dev_priv); -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv); -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size); -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - resource_size_t stolen_offset, - resource_size_t gtt_offset, - resource_size_t size); - /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); -/* i915_gem_shrinker.c */ -unsigned long i915_gem_shrink(struct drm_i915_private *i915, - unsigned long target, - unsigned long *nr_scanned, - unsigned flags); -#define I915_SHRINK_UNBOUND BIT(0) -#define I915_SHRINK_BOUND BIT(1) -#define I915_SHRINK_ACTIVE BIT(2) -#define I915_SHRINK_VMAPS BIT(3) -#define I915_SHRINK_WRITEBACK BIT(4) - -unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); -void i915_gem_shrinker_register(struct drm_i915_private *i915); -void i915_gem_shrinker_unregister(struct drm_i915_private *i915); -void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, - struct mutex *mutex); - /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); - return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && i915_gem_object_is_tiled(obj); } @@ -2710,25 +1956,12 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master); - -/* i915_perf.c */ -extern void i915_perf_init(struct drm_i915_private *dev_priv); -extern void i915_perf_fini(struct drm_i915_private *dev_priv); -extern void i915_perf_register(struct drm_i915_private *dev_priv); -extern void i915_perf_unregister(struct drm_i915_private *dev_priv); - -/* i915_suspend.c */ -extern int i915_save_state(struct drm_i915_private *dev_priv); -extern int i915_restore_state(struct drm_i915_private *dev_priv); - -/* i915_sysfs.c */ -void i915_setup_sysfs(struct drm_i915_private *dev_priv); -void i915_teardown_sysfs(struct drm_i915_private *dev_priv); + struct i915_vma *batch, + u32 batch_offset, + u32 batch_length, + struct i915_vma *shadow, + bool trampoline); +#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ static inline struct intel_device_info * @@ -2737,25 +1970,9 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -/* modesetting */ -extern void intel_modeset_init_hw(struct drm_device *dev); -extern int intel_modeset_init(struct drm_device *dev); -extern void intel_modeset_cleanup(struct drm_device *dev); -extern int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, - bool state); -extern void intel_display_resume(struct drm_device *dev); -extern void i915_redisable_vga(struct drm_i915_private *dev_priv); -extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); -extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); - int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -extern struct intel_display_error_state * -intel_display_capture_error_state(struct drm_i915_private *dev_priv); -extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct intel_display_error_state *error); - #define __I915_REG_OP(op__, dev_priv__, ...) \ intel_uncore_##op__(&(dev_priv__)->uncore, __VA_ARGS__) @@ -2793,34 +2010,27 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, #define I915_READ_FW(reg__) __I915_REG_OP(read_fw, dev_priv, (reg__)) #define I915_WRITE_FW(reg__, val__) __I915_REG_OP(write_fw, dev_priv, (reg__), (val__)) -/* "Broadcast RGB" property */ -#define INTEL_BROADCAST_RGB_AUTO 0 -#define INTEL_BROADCAST_RGB_FULL 1 -#define INTEL_BROADCAST_RGB_LIMITED 2 - -void i915_memcpy_init_early(struct drm_i915_private *dev_priv); -bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); +/* register wait wrappers for display regs */ +#define intel_de_wait_for_register(dev_priv_, reg_, mask_, value_, timeout_) \ + intel_wait_for_register(&(dev_priv_)->uncore, \ + (reg_), (mask_), (value_), (timeout_)) -/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment, - * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot - * perform the operation. To check beforehand, pass in the parameters to - * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits, - * you only need to pass in the minor offsets, page-aligned pointers are - * always valid. - * - * For just checking for SSE4.1, in the foreknowledge that the future use - * will be correctly aligned, just use i915_has_memcpy_from_wc(). - */ -#define i915_can_memcpy_from_wc(dst, src, len) \ - i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0) +#define intel_de_wait_for_set(dev_priv_, reg_, mask_, timeout_) ({ \ + u32 mask__ = (mask_); \ + intel_de_wait_for_register((dev_priv_), (reg_), \ + mask__, mask__, (timeout_)); \ +}) -#define i915_has_memcpy_from_wc() \ - i915_memcpy_from_wc(NULL, NULL, 0) +#define intel_de_wait_for_clear(dev_priv_, reg_, mask_, timeout_) \ + intel_de_wait_for_register((dev_priv_), (reg_), (mask_), 0, (timeout_)) /* i915_mm.c */ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +int remap_io_sg(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct scatterlist *sgl, resource_size_t iobase); static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { @@ -2830,26 +2040,16 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) return I915_HWS_CSB_WRITE_INDEX; } -static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) -{ - return i915_ggtt_offset(i915->gt.scratch); -} - static inline enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915) { return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } -static inline void add_taint_for_CI(unsigned int taint) +static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc) { - /* - * The system is "ok", just about surviving for the user, but - * CI results are now unreliable as the HW is very suspect. - * CI checks the taint state after every test and will reboot - * the machine if the kernel is tainted. - */ - add_taint(taint, LOCKDEP_STILL_OK); + return intel_guc_is_submission_supported(guc) && + intel_guc_is_running(guc); } #endif diff --git a/drivers/gpu/drm/i915/i915_fixed.h b/drivers/gpu/drm/i915/i915_fixed.h index 6621595fe74c..a327094de2bd 100644 --- a/drivers/gpu/drm/i915/i915_fixed.h +++ b/drivers/gpu/drm/i915/i915_fixed.h @@ -6,6 +6,11 @@ #ifndef _I915_FIXED_H_ #define _I915_FIXED_H_ +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/types.h> + typedef struct { u32 val; } uint_fixed_16_16_t; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8a659d3d7435..c2de2f45b459 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,7 +29,7 @@ #include <drm/i915_drm.h> #include <linux/dma-fence-array.h> #include <linux/kthread.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/stop_machine.h> @@ -44,36 +44,45 @@ #include "gem/i915_gem_clflush.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" -#include "gem/i915_gem_pm.h" -#include "gem/i915_gemfs.h" +#include "gem/i915_gem_mman.h" +#include "gem/i915_gem_region.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_mocs.h" -#include "gt/intel_reset.h" #include "gt/intel_workarounds.h" #include "i915_drv.h" -#include "i915_scatterlist.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "intel_drv.h" #include "intel_pm.h" static int -insert_mappable_node(struct i915_ggtt *ggtt, - struct drm_mm_node *node, u32 size) +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { + int err; + + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; + memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, - size, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + + mutex_unlock(&ggtt->vm.mutex); + + return err; } static void -remove_mappable_node(struct drm_mm_node *node) +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } int @@ -85,7 +94,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - mutex_lock(&ggtt->vm.mutex); + if (mutex_lock_interruptible(&ggtt->vm.mutex)) + return -EINTR; pinned = ggtt->vm.reserved; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) @@ -100,28 +110,68 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -int i915_gem_object_unbind(struct drm_i915_gem_object *obj) +int i915_gem_object_unbind(struct drm_i915_gem_object *obj, + unsigned long flags) { - struct i915_vma *vma; + struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; LIST_HEAD(still_in_list); - int ret = 0; + intel_wakeref_t wakeref; + struct i915_vma *vma; + int ret; + + if (!atomic_read(&obj->bind_count)) + return 0; - lockdep_assert_held(&obj->base.dev->struct_mutex); + /* + * As some machines use ACPI to handle runtime-resume callbacks, and + * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex + * as they are required by the shrinker. Ergo, we wake the device up + * first just in case. + */ + wakeref = intel_runtime_pm_get(rpm); +try_again: + ret = 0; spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { + struct i915_address_space *vm = vma->vm; + list_move_tail(&vma->obj_link, &still_in_list); + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) + continue; + + ret = -EAGAIN; + if (!i915_vm_tryopen(vm)) + break; + + /* Prevent vma being freed by i915_vma_parked as we unbind */ + vma = __i915_vma_get(vma); spin_unlock(&obj->vma.lock); - ret = i915_vma_unbind(vma); + if (vma) { + ret = -EBUSY; + if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || + !i915_vma_is_active(vma)) + ret = i915_vma_unbind(vma); + + __i915_vma_put(vma); + } + i915_vm_close(vm); spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma.list); + list_splice_init(&still_in_list, &obj->vma.list); spin_unlock(&obj->vma.lock); + if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { + rcu_barrier(); /* flush the i915_vm_release() */ + goto try_again; + } + + intel_runtime_pm_put(rpm, wakeref); + return ret; } @@ -133,23 +183,25 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, void *vaddr = obj->phys_handle->vaddr + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); - /* We manually control the domain here and pretend that it + /* + * We manually control the domain here and pretend that it * remains coherent i.e. in the GTT domain, like shmem_pwrite. */ - intel_fb_obj_invalidate(obj, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); + if (copy_from_user(vaddr, user_data, args->size)) return -EFAULT; drm_clflush_virt_range(vaddr, args->size); - i915_gem_chipset_flush(to_i915(obj->base.dev)); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - intel_fb_obj_flush(obj, ORIGIN_CPU); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); return 0; } static int i915_gem_create(struct drm_file *file, - struct drm_i915_private *dev_priv, + struct intel_memory_region *mr, u64 *size_p, u32 *handle_p) { @@ -158,12 +210,16 @@ i915_gem_create(struct drm_file *file, u64 size; int ret; - size = round_up(*size_p, PAGE_SIZE); + GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); + size = round_up(*size_p, mr->min_page_size); if (size == 0) return -EINVAL; + /* For most of the ABI (e.g. mmap) we think in system pages */ + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + /* Allocate the new object */ - obj = i915_gem_object_create_shmem(dev_priv, size); + obj = i915_gem_object_create_region(mr, size, 0); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -183,6 +239,7 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { + enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; @@ -208,8 +265,18 @@ i915_gem_dumb_create(struct drm_file *file, DRM_FORMAT_MOD_LINEAR)) args->pitch = ALIGN(args->pitch, 4096); - args->size = args->pitch * args->height; - return i915_gem_create(file, to_i915(dev), + if (args->pitch < args->width) + return -EINVAL; + + args->size = mul_u32_u32(args->pitch, args->height); + + mem_type = INTEL_MEMORY_SYSTEM; + if (HAS_LMEM(to_i915(dev))) + mem_type = INTEL_MEMORY_LOCAL; + + return i915_gem_create(file, + intel_memory_region_by_type(to_i915(dev), + mem_type), &args->size, &args->handle); } @@ -223,55 +290,17 @@ int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create *args = data; - i915_gem_flush_free_objects(dev_priv); + i915_gem_flush_free_objects(i915); - return i915_gem_create(file, dev_priv, + return i915_gem_create(file, + intel_memory_region_by_type(i915, + INTEL_MEMORY_SYSTEM), &args->size, &args->handle); } -void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - /* - * No actual flushing is required for the GTT write domain for reads - * from the GTT domain. Writes to it "immediately" go to main memory - * as far as we know, so there's no chipset flush. It also doesn't - * land in the GPU render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - * - * We also have to wait a bit for the writes to land from the GTT. - * An uncached read (i.e. mmio) seems to be ideal for the round-trip - * timing. This issue has only been observed when switching quickly - * between GTT writes and CPU reads from inside the kernel on recent hw, - * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour, until Cannonlake - * that was!). - */ - - wmb(); - - if (INTEL_INFO(dev_priv)->has_coherent_ggtt) - return; - - i915_gem_chipset_flush(dev_priv); - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - struct intel_uncore *uncore = &dev_priv->uncore; - - spin_lock_irq(&uncore->lock); - intel_uncore_posting_read_fw(uncore, - RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); - } -} - static int shmem_pread(struct page *page, int offset, int len, char __user *user_data, bool needs_clflush) @@ -370,33 +399,23 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, u64 remain, offset; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONFAULT | - PIN_NONBLOCK); + vma = ERR_PTR(-ENODEV); + if (!i915_gem_object_is_tiled(obj)) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); - node.allocated = false; - ret = i915_vma_put_fence(vma); - if (ret) { - i915_vma_unpin(vma); - vma = ERR_PTR(ret); - } - } - if (IS_ERR(vma)) { + node.flags = 0; + } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; - GEM_BUG_ON(!node.allocated); + goto out_rpm; + GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -429,12 +448,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, unsigned page_offset = offset_in_page(offset); unsigned page_length = PAGE_SIZE - page_offset; page_length = remain < page_length ? remain : page_length; - if (node.allocated) { - wmb(); + if (drm_mm_node_allocated(&node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), node.start, I915_CACHE_NONE, 0); - wmb(); } else { page_base += offset & PAGE_MASK; } @@ -452,18 +469,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); - if (node.allocated) { - wmb(); + if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return ret; } @@ -570,10 +583,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, void __user *user_data; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -583,37 +592,29 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * using the cache bypass of indirect GGTT access. */ wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (!wakeref) { - ret = -EFAULT; - goto out_unlock; - } + if (!wakeref) + return -EFAULT; } else { /* No backing pages, no fallback, we must force GGTT access */ wakeref = intel_runtime_pm_get(rpm); } - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONFAULT | - PIN_NONBLOCK); + vma = ERR_PTR(-ENODEV); + if (!i915_gem_object_is_tiled(obj)) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); - node.allocated = false; - ret = i915_vma_put_fence(vma); - if (ret) { - i915_vma_unpin(vma); - vma = ERR_PTR(ret); - } - } - if (IS_ERR(vma)) { + node.flags = 0; + } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out_rpm; - GEM_BUG_ON(!node.allocated); + GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -631,7 +632,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, goto out_unpin; } - intel_fb_obj_invalidate(obj, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; @@ -647,8 +648,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, unsigned int page_offset = offset_in_page(offset); unsigned int page_length = PAGE_SIZE - page_offset; page_length = remain < page_length ? remain : page_length; - if (node.allocated) { - wmb(); /* flush the write before we modify the GGTT */ + if (drm_mm_node_allocated(&node)) { + /* flush the write before we modify the GGTT */ + intel_gt_flush_ggtt_writes(ggtt->vm.gt); ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), node.start, I915_CACHE_NONE, 0); @@ -672,22 +674,20 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, ORIGIN_CPU); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); - if (node.allocated) { - wmb(); + if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } out_rpm: intel_runtime_pm_put(rpm, wakeref); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -765,7 +765,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, ORIGIN_CPU); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); i915_gem_object_unlock_fence(obj, fence); return ret; @@ -899,7 +899,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) list_for_each_entry_safe(obj, on, &i915->ggtt.userfault_list, userfault_link) - __i915_gem_object_release_mmap(obj); + __i915_gem_object_release_mmap_gtt(obj); /* * The fence will be lost when the device powers down. If any were @@ -929,93 +929,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static int wait_for_engines(struct drm_i915_private *i915) -{ - if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - return -EIO; - } - - return 0; -} - -static long -wait_for_timelines(struct drm_i915_private *i915, - unsigned int flags, long timeout) -{ - struct i915_gt_timelines *gt = &i915->gt.timelines; - struct i915_timeline *tl; - - mutex_lock(>->mutex); - list_for_each_entry(tl, >->active_list, link) { - struct i915_request *rq; - - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) - continue; - - mutex_unlock(>->mutex); - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); - - timeout = i915_request_wait(rq, flags, timeout); - i915_request_put(rq); - if (timeout < 0) - return timeout; - - /* restart after reacquiring the lock */ - mutex_lock(>->mutex); - tl = list_entry(>->active_list, typeof(*tl), link); - } - mutex_unlock(>->mutex); - - return timeout; -} - -int i915_gem_wait_for_idle(struct drm_i915_private *i915, - unsigned int flags, long timeout) -{ - GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", - yesno(i915->gt.awake)); - - /* If the device is asleep, we have no requests outstanding */ - if (!READ_ONCE(i915->gt.awake)) - return 0; - - timeout = wait_for_timelines(i915, flags, timeout); - if (timeout < 0) - return timeout; - - if (flags & I915_WAIT_LOCKED) { - int err; - - lockdep_assert_held(&i915->drm.struct_mutex); - - err = wait_for_engines(i915); - if (err) - return err; - - i915_retire_requests(i915); - } - - return 0; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -1023,26 +936,29 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_address_space *vm = &dev_priv->ggtt.vm; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + if (i915_gem_object_never_bind_ggtt(obj)) + return ERR_PTR(-ENODEV); if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { - /* If the required space is larger than the available + /* + * If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in * vain. Worse, doing so may cause us to ping-pong * the object in and out of the Global GTT and * waste a lot of cycles under the mutex. */ - if (obj->base.size > dev_priv->ggtt.mappable_end) + if (obj->base.size > ggtt->mappable_end) return ERR_PTR(-E2BIG); - /* If NONBLOCK is set the caller is optimistically + /* + * If NONBLOCK is set the caller is optimistically * trying to cache the full object within the mappable * aperture, and *must* have a fallback in place for * situations where we cannot bind the object. We @@ -1058,11 +974,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, * we could try to minimise harm to others. */ if (flags & PIN_NONBLOCK && - obj->base.size > dev_priv->ggtt.mappable_end / 2) + obj->base.size > ggtt->mappable_end / 2) return ERR_PTR(-ENOSPC); } - vma = i915_vma_instance(obj, vm, view); + vma = i915_vma_instance(obj, &ggtt->vm, view); if (IS_ERR(vma)) return vma; @@ -1072,22 +988,23 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); if (flags & PIN_MAPPABLE && - vma->fence_size > dev_priv->ggtt.mappable_end / 2) + vma->fence_size > ggtt->mappable_end / 2) return ERR_PTR(-ENOSPC); } - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x, req.alignment=%llx," - " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", - i915_ggtt_offset(vma), alignment, - !!(flags & PIN_MAPPABLE), - i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret); } + if (vma->fence && !i915_gem_object_is_tiled(obj)) { + mutex_lock(&ggtt->vm.mutex); + ret = i915_vma_revoke_fence(vma); + mutex_unlock(&ggtt->vm.mutex); + if (ret) + return ERR_PTR(ret); + } + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); @@ -1169,333 +1086,6 @@ out: return err; } -void i915_gem_sanitize(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - /* - * As we have just resumed the machine and woken the device up from - * deep PCI sleep (presumably D3_cold), assume the HW has been reset - * back to defaults, recovering from whatever wedged state we left it - * in and so worth trying to use the device once more. - */ - if (i915_terminally_wedged(i915)) - i915_gem_unset_wedged(i915); - - /* - * If we inherit context state from the BIOS or earlier occupants - * of the GPU, the GPU may be in an inconsistent state when we - * try to take over. The only way to remove the earlier state - * is by resetting. However, resetting on earlier gen is tricky as - * it may impact the display and we are uncertain about the stability - * of the reset, so this could be applied to even earlier gen. - */ - intel_gt_sanitize(i915, false); - - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); -} - -void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 5 || - dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) - return; - - I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | - DISP_TILE_SURFACE_SWIZZLING); - - if (IS_GEN(dev_priv, 5)) - return; - - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); - if (IS_GEN(dev_priv, 6)) - I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); - else if (IS_GEN(dev_priv, 7)) - I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); - else if (IS_GEN(dev_priv, 8)) - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); - else - BUG(); -} - -static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) -{ - I915_WRITE(RING_CTL(base), 0); - I915_WRITE(RING_HEAD(base), 0); - I915_WRITE(RING_TAIL(base), 0); - I915_WRITE(RING_START(base), 0); -} - -static void init_unused_rings(struct drm_i915_private *dev_priv) -{ - if (IS_I830(dev_priv)) { - init_unused_ring(dev_priv, PRB1_BASE); - init_unused_ring(dev_priv, SRB0_BASE); - init_unused_ring(dev_priv, SRB1_BASE); - init_unused_ring(dev_priv, SRB2_BASE); - init_unused_ring(dev_priv, SRB3_BASE); - } else if (IS_GEN(dev_priv, 2)) { - init_unused_ring(dev_priv, SRB0_BASE); - init_unused_ring(dev_priv, SRB1_BASE); - } else if (IS_GEN(dev_priv, 3)) { - init_unused_ring(dev_priv, PRB1_BASE); - init_unused_ring(dev_priv, PRB2_BASE); - } -} - -int i915_gem_init_hw(struct drm_i915_private *dev_priv) -{ - int ret; - - dev_priv->gt.last_init_time = ktime_get(); - - /* Double layer security blanket, see i915_gem_init() */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) - I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); - - if (IS_HASWELL(dev_priv)) - I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? - LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); - - /* Apply the GT workarounds... */ - intel_gt_apply_workarounds(dev_priv); - /* ...and determine whether they are sticking. */ - intel_gt_verify_workarounds(dev_priv, "init"); - - i915_gem_init_swizzling(dev_priv); - - /* - * At least 830 can leave some of the unused rings - * "active" (ie. head != tail) after resume which - * will prevent c3 entry. Makes sure all unused rings - * are totally idle. - */ - init_unused_rings(dev_priv); - - BUG_ON(!dev_priv->kernel_context); - ret = i915_terminally_wedged(dev_priv); - if (ret) - goto out; - - ret = i915_ppgtt_init_hw(dev_priv); - if (ret) { - DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); - goto out; - } - - ret = intel_wopcm_init_hw(&dev_priv->wopcm); - if (ret) { - DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); - goto out; - } - - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(dev_priv); - if (ret) { - DRM_ERROR("Enabling uc failed (%d)\n", ret); - goto out; - } - - intel_mocs_init_l3cc_table(dev_priv); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - - intel_engines_set_scheduler_caps(dev_priv); - return 0; - -out: - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - return ret; -} - -static int __intel_engines_record_defaults(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - enum intel_engine_id id; - int err = 0; - - /* - * As we reset the gpu during very early sanitisation, the current - * register state on the GPU should reflect its defaults values. - * We load a context onto the hw (with restore-inhibit), then switch - * over to a second context to save that default register state. We - * can then prime every new context with that state so they all start - * from the same default HW values. - */ - - ctx = i915_gem_context_create_kernel(i915, 0); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - e = i915_gem_context_lock_engines(ctx); - - for_each_engine(engine, i915, id) { - struct intel_context *ce = e->engines[id]; - struct i915_request *rq; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_active; - } - - err = 0; - if (rq->engine->init_context) - err = rq->engine->init_context(rq); - - i915_request_add(rq); - if (err) - goto err_active; - } - - /* Flush the default context image to memory, and enable powersaving. */ - if (!i915_gem_load_power_context(i915)) { - err = -EIO; - goto err_active; - } - - for_each_engine(engine, i915, id) { - struct intel_context *ce = e->engines[id]; - struct i915_vma *state = ce->state; - void *vaddr; - - if (!state) - continue; - - GEM_BUG_ON(intel_context_is_pinned(ce)); - - /* - * As we will hold a reference to the logical state, it will - * not be torn down with the context, and importantly the - * object will hold onto its vma (making it possible for a - * stray GTT write to corrupt our defaults). Unmap the vma - * from the GTT to prevent such accidents and reclaim the - * space. - */ - err = i915_vma_unbind(state); - if (err) - goto err_active; - - i915_gem_object_lock(state->obj); - err = i915_gem_object_set_to_cpu_domain(state->obj, false); - i915_gem_object_unlock(state->obj); - if (err) - goto err_active; - - engine->default_state = i915_gem_object_get(state->obj); - i915_gem_object_set_cache_coherency(engine->default_state, - I915_CACHE_LLC); - - /* Check we can acquire the image of the context state */ - vaddr = i915_gem_object_pin_map(engine->default_state, - I915_MAP_FORCE_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_active; - } - - i915_gem_object_unpin_map(engine->default_state); - } - - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { - unsigned int found = intel_engines_has_context_isolation(i915); - - /* - * Make sure that classes with multiple engine instances all - * share the same basic configuration. - */ - for_each_engine(engine, i915, id) { - unsigned int bit = BIT(engine->uabi_class); - unsigned int expected = engine->default_state ? bit : 0; - - if ((found & bit) != expected) { - DRM_ERROR("mismatching default context state for class %d on engine %s\n", - engine->uabi_class, engine->name); - } - } - } - -out_ctx: - i915_gem_context_unlock_engines(ctx); - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return err; - -err_active: - /* - * If we have to abandon now, we expect the engines to be idle - * and ready to be torn-down. The quickest way we can accomplish - * this is by declaring ourselves wedged. - */ - i915_gem_set_wedged(i915); - goto out_ctx; -} - -static int -i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - obj = i915_gem_object_create_stolen(i915, size); - if (!obj) - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - return PTR_ERR(obj); - } - - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unref; - } - - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_unref; - - i915->gt.scratch = vma; - return 0; - -err_unref: - i915_gem_object_put(obj); - return ret; -} - -static void i915_gem_fini_scratch(struct drm_i915_private *i915) -{ - i915_vma_unpin_and_release(&i915->gt.scratch, 0); -} - -static int intel_engines_verify_workarounds(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return 0; - - for_each_engine(engine, i915, id) { - if (intel_engine_verify_workarounds(engine, "load")) - err = -EIO; - } - - return err; -} - int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -1505,77 +1095,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mkwrite_device_info(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; - dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - - i915_timelines_init(dev_priv); - ret = i915_gem_init_userptr(dev_priv); if (ret) return ret; - ret = intel_uc_init_misc(dev_priv); - if (ret) - return ret; + intel_uc_fetch_firmwares(&dev_priv->gt.uc); + intel_wopcm_init(&dev_priv->wopcm); - ret = intel_wopcm_init(&dev_priv->wopcm); - if (ret) - goto err_uc_misc; - - /* This is just a security blanket to placate dragons. - * On some systems, we very sporadically observe that the first TLBs - * used by the CS may be stale, despite us poking the TLB reset. If - * we hold the forcewake during initialisation these problems - * just magically go away. - */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - ret = i915_gem_init_ggtt(dev_priv); + ret = i915_init_ggtt(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_unlock; } - ret = i915_gem_init_scratch(dev_priv, - IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_ggtt; - } - - ret = intel_engines_setup(dev_priv); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_unlock; - } - - ret = i915_gem_contexts_init(dev_priv); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_scratch; - } - - ret = intel_engines_init(dev_priv); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_context; - } - - intel_init_gt_powersave(dev_priv); - - ret = intel_uc_init(dev_priv); - if (ret) - goto err_pm; - - ret = i915_gem_init_hw(dev_priv); - if (ret) - goto err_uc_init; - - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_gt_resume(dev_priv); - if (ret) - goto err_init_hw; - /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1587,26 +1119,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); - ret = intel_engines_verify_workarounds(dev_priv); + ret = intel_gt_init(&dev_priv->gt); if (ret) - goto err_gt; - - ret = __intel_engines_record_defaults(dev_priv); - if (ret) - goto err_gt; - - if (i915_inject_load_failure()) { - ret = -ENODEV; - goto err_gt; - } - - if (i915_inject_load_failure()) { - ret = -EIO; - goto err_gt; - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); + goto err_unlock; return 0; @@ -1616,120 +1131,82 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_gt: - mutex_unlock(&dev_priv->drm.struct_mutex); - - i915_gem_set_wedged(dev_priv); - i915_gem_suspend(dev_priv); - i915_gem_suspend_late(dev_priv); - - i915_gem_drain_workqueue(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); -err_init_hw: - intel_uc_fini_hw(dev_priv); -err_uc_init: - intel_uc_fini(dev_priv); -err_pm: - if (ret != -EIO) { - intel_cleanup_gt_powersave(dev_priv); - intel_engines_cleanup(dev_priv); - } -err_context: - if (ret != -EIO) - i915_gem_contexts_fini(dev_priv); -err_scratch: - i915_gem_fini_scratch(dev_priv); -err_ggtt: err_unlock: - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); - -err_uc_misc: - intel_uc_fini_misc(dev_priv); + i915_gem_drain_workqueue(dev_priv); if (ret != -EIO) { + intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); } if (ret == -EIO) { - mutex_lock(&dev_priv->drm.struct_mutex); - /* - * Allow engine initialisation to fail by marking the GPU as - * wedged. But we only want to do this where the GPU is angry, + * Allow engines or uC initialisation to fail by marking the GPU + * as wedged. But we only want to do this when the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!i915_reset_failed(dev_priv)) { - i915_load_error(dev_priv, - "Failed to initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(dev_priv); + if (!intel_gt_is_wedged(&dev_priv->gt)) { + i915_probe_error(dev_priv, + "Failed to initialize GPU, declaring it wedged!\n"); + intel_gt_set_wedged(&dev_priv->gt); } /* Minimal basic recovery for KMS */ ret = i915_ggtt_enable_hw(dev_priv); i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); intel_init_clock_gating(dev_priv); - - mutex_unlock(&dev_priv->drm.struct_mutex); } i915_gem_drain_freed_objects(dev_priv); return ret; } -void i915_gem_fini_hw(struct drm_i915_private *dev_priv) +void i915_gem_driver_register(struct drm_i915_private *i915) { - GEM_BUG_ON(dev_priv->gt.awake); + i915_gem_driver_register__shrinker(i915); + + intel_engines_driver_register(i915); +} + +void i915_gem_driver_unregister(struct drm_i915_private *i915) +{ + i915_gem_driver_unregister__shrinker(i915); +} +void i915_gem_driver_remove(struct drm_i915_private *dev_priv) +{ intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); i915_gem_suspend_late(dev_priv); - intel_disable_gt_powersave(dev_priv); + intel_gt_driver_remove(&dev_priv->gt); + dev_priv->uabi_engines = RB_ROOT; /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); - intel_uc_fini_hw(dev_priv); - intel_uc_fini(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - i915_gem_drain_freed_objects(dev_priv); } -void i915_gem_fini(struct drm_i915_private *dev_priv) +void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); - intel_engines_cleanup(dev_priv); - i915_gem_contexts_fini(dev_priv); - i915_gem_fini_scratch(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_driver_release__contexts(dev_priv); - intel_wa_list_free(&dev_priv->gt_wa_list); + intel_gt_driver_release(&dev_priv->gt); - intel_cleanup_gt_powersave(dev_priv); + intel_wa_list_free(&dev_priv->gt_wa_list); - intel_uc_fini_misc(dev_priv); + intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); - WARN_ON(!list_empty(&dev_priv->contexts.list)); -} - -void i915_gem_init_mmio(struct drm_i915_private *i915) -{ - i915_gem_sanitize(i915); + WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } static void i915_gem_init__mm(struct drm_i915_private *i915) { spin_lock_init(&i915->mm.obj_lock); - spin_lock_init(&i915->mm.free_lock); init_llist_head(&i915->mm.free_list); @@ -1739,33 +1216,12 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) i915_gem_init__objects(i915); } -int i915_gem_init_early(struct drm_i915_private *dev_priv) +void i915_gem_init_early(struct drm_i915_private *dev_priv) { - int err; - - intel_gt_pm_init(dev_priv); - - INIT_LIST_HEAD(&dev_priv->gt.active_rings); - INIT_LIST_HEAD(&dev_priv->gt.closed_vma); - spin_lock_init(&dev_priv->gt.closed_lock); - i915_gem_init__mm(dev_priv); - i915_gem_init__pm(dev_priv); - - init_waitqueue_head(&dev_priv->gpu_error.wait_queue); - init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - mutex_init(&dev_priv->gpu_error.wedge_mutex); - init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); - - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); + i915_gem_init__contexts(dev_priv); spin_lock_init(&dev_priv->fb_tracking.lock); - - err = i915_gemfs_init(dev_priv); - if (err) - DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); - - return 0; } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) @@ -1774,10 +1230,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.shrink_count); - - cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); - - i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) @@ -1869,39 +1321,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } -/** - * i915_gem_track_fb - update frontbuffer tracking - * @old: current GEM buffer for the frontbuffer slots - * @new: new GEM buffer for the frontbuffer slots - * @frontbuffer_bits: bitmask of frontbuffer slots - * - * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them - * from @old and setting them in @new. Both @old and @new can be NULL. - */ -void i915_gem_track_fb(struct drm_i915_gem_object *old, - struct drm_i915_gem_object *new, - unsigned frontbuffer_bits) -{ - /* Control of individual bits within the mask are guarded by - * the owning plane->mutex, i.e. we can never see concurrent - * manipulation of individual bits. But since the bitfield as a whole - * is updated using RMW, we need to use atomics in order to update - * the bits. - */ - BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > - BITS_PER_TYPE(atomic_t)); - - if (old) { - WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); - atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); - } - - if (new) { - WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); - atomic_or(frontbuffer_bits, &new->frontbuffer_bits); - } -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index fe82d3571072..1753c84d6c0d 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,17 +28,20 @@ #include <linux/bug.h> #include <linux/interrupt.h> +#include <drm/drm_drv.h> + +#include "i915_utils.h" + struct drm_i915_private; #ifdef CONFIG_DRM_I915_DEBUG_GEM -#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) +#define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER) #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - pr_err("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ - GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE_DUMP(); \ BUG(); \ } \ } while(0) @@ -64,17 +67,34 @@ struct drm_i915_private; #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) -#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) +#define GEM_TRACE_ERR(...) do { \ + pr_err(__VA_ARGS__); \ + trace_printk(__VA_ARGS__); \ +} while (0) +#define GEM_TRACE_DUMP() \ + do { ftrace_dump(DUMP_ALL); add_taint_for_CI(TAINT_WARN); } while (0) #define GEM_TRACE_DUMP_ON(expr) \ - do { if (expr) ftrace_dump(DUMP_ALL); } while (0) + do { if (expr) GEM_TRACE_DUMP(); } while (0) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_ERR(...) do { } while (0) #define GEM_TRACE_DUMP() do { } while (0) #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +static inline void tasklet_lock(struct tasklet_struct *t) +{ + while (!tasklet_trylock(t)) + cpu_relax(); +} + +static inline bool tasklet_is_locked(const struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_RUN, &t->state); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c deleted file mode 100644 index 25a3e4d09a2f..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#include "i915_gem_batch_pool.h" -#include "i915_drv.h" - -/** - * DOC: batch pool - * - * In order to submit batch buffers as 'secure', the software command parser - * must ensure that a batch buffer cannot be modified after parsing. It does - * this by copying the user provided batch buffer contents to a kernel owned - * buffer from which the hardware will actually execute, and by carefully - * managing the address space bindings for such buffers. - * - * The batch pool framework provides a mechanism for the driver to manage a - * set of scratch buffers to use for this purpose. The framework can be - * extended to support other uses cases should they arise. - */ - -/** - * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @pool: the batch buffer pool - * @engine: the associated request submission engine - */ -void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, - struct intel_engine_cs *engine) -{ - int n; - - pool->engine = engine; - - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) - INIT_LIST_HEAD(&pool->cache_list[n]); -} - -/** - * i915_gem_batch_pool_fini() - clean up a batch buffer pool - * @pool: the pool to clean up - * - * Note: Callers must hold the struct_mutex. - */ -void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) -{ - int n; - - lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); - - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - struct drm_i915_gem_object *obj, *next; - - list_for_each_entry_safe(obj, next, - &pool->cache_list[n], - batch_pool_link) - i915_gem_object_put(obj); - - INIT_LIST_HEAD(&pool->cache_list[n]); - } -} - -/** - * i915_gem_batch_pool_get() - allocate a buffer from the pool - * @pool: the batch buffer pool - * @size: the minimum desired size of the returned buffer - * - * Returns an inactive buffer from @pool with at least @size bytes, - * with the pages pinned. The caller must i915_gem_object_unpin_pages() - * on the returned object. - * - * Note: Callers must hold the struct_mutex - * - * Return: the buffer object or an error pointer - */ -struct drm_i915_gem_object * -i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, - size_t size) -{ - struct drm_i915_gem_object *obj; - struct list_head *list; - int n, ret; - - lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); - - /* Compute a power-of-two bucket, but throw everything greater than - * 16KiB into the same bucket: i.e. the the buckets hold objects of - * (1 page, 2 pages, 4 pages, 8+ pages). - */ - n = fls(size >> PAGE_SHIFT) - 1; - if (n >= ARRAY_SIZE(pool->cache_list)) - n = ARRAY_SIZE(pool->cache_list) - 1; - list = &pool->cache_list[n]; - - list_for_each_entry(obj, list, batch_pool_link) { - /* The batches are strictly LRU ordered */ - if (i915_gem_object_is_active(obj)) { - struct reservation_object *resv = obj->base.resv; - - if (!reservation_object_test_signaled_rcu(resv, true)) - break; - - i915_retire_requests(pool->engine->i915); - GEM_BUG_ON(i915_gem_object_is_active(obj)); - - /* - * The object is now idle, clear the array of shared - * fences before we add a new request. Although, we - * remain on the same engine, we may be on a different - * timeline and so may continually grow the array, - * trapping a reference to all the old fences, rather - * than replace the existing fence. - */ - if (rcu_access_pointer(resv->fence)) { - reservation_object_lock(resv, NULL); - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); - } - } - - GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->base.resv, - true)); - - if (obj->base.size >= size) - goto found; - } - - obj = i915_gem_object_create_internal(pool->engine->i915, size); - if (IS_ERR(obj)) - return obj; - -found: - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ERR_PTR(ret); - - list_move_tail(&obj->batch_pool_link, list); - return obj; -} diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h deleted file mode 100644 index feeeeeaa54d8..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef I915_GEM_BATCH_POOL_H -#define I915_GEM_BATCH_POOL_H - -#include <linux/types.h> - -struct drm_i915_gem_object; -struct intel_engine_cs; - -struct i915_gem_batch_pool { - struct intel_engine_cs *engine; - struct list_head cache_list[4]; -}; - -void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, - struct intel_engine_cs *engine); -void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); -struct drm_i915_gem_object * -i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); - -#endif /* I915_GEM_BATCH_POOL_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a5783c4cb98b..0697bedebeef 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -29,16 +29,16 @@ #include <drm/i915_drm.h> #include "gem/i915_gem_context.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "intel_drv.h" #include "i915_trace.h" I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static int ggtt_flush(struct drm_i915_private *i915) +static int ggtt_flush(struct intel_gt *gt) { /* * Not everything in the GGTT is tracked via vma (otherwise we @@ -47,10 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); + return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } static bool @@ -62,9 +59,6 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) - return false; - list_add(&vma->evict_link, unwind); return drm_mm_scan_add_block(scan, &vma->node); } @@ -74,7 +68,7 @@ mark_free(struct drm_mm_scan *scan, * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space - * @cache_level: cache_level for the desired space + * @color: color for the desired space * @start: start (inclusive) of the range from which to evict objects * @end: end (exclusive) of the range from which to evict objects * @flags: additional flags to control the eviction algorithm @@ -95,11 +89,10 @@ mark_free(struct drm_mm_scan *scan, int i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags) { - struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; struct i915_vma *vma, *next; @@ -108,7 +101,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *active; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -128,17 +121,10 @@ i915_gem_evict_something(struct i915_address_space *vm, if (flags & PIN_MAPPABLE) mode = DRM_MM_INSERT_LOW; drm_mm_scan_init_with_range(&scan, &vm->mm, - min_size, alignment, cache_level, + min_size, alignment, color, start, end, mode); - /* - * Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(dev_priv); + intel_gt_retire_requests(vm->gt); search_again: active = NULL; @@ -211,7 +197,7 @@ search_again: if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(dev_priv); + ret = ggtt_flush(vm->gt); if (ret) return ret; @@ -239,12 +225,12 @@ found: list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) { vma = container_of(node, struct i915_vma, node); - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -270,25 +256,23 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, u64 start = target->start; u64 end = start + target->size; struct i915_vma *vma, *next; - bool check_color; int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); trace_i915_gem_evict_node(vm, target, flags); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); - check_color = vm->mm.color_adjust; - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ if (start) start -= I915_GTT_PAGE_SIZE; @@ -305,7 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } - GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(!drm_mm_node_allocated(node)); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between @@ -314,7 +298,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * abutt and conflict. If they are in conflict, then we evict * those as well to make room for our guard pages. */ - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { if (node->start + node->size == target->start) { if (node->color == target->color) continue; @@ -331,11 +315,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } - if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { - ret = -ENOSPC; - break; - } - /* Overlap of objects in the same batch? */ if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; @@ -360,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -380,11 +359,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, */ int i915_gem_evict_vm(struct i915_address_space *vm) { - struct list_head eviction_list; - struct i915_vma *vma, *next; - int ret; + int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict_vm(vm); /* Switch back to the default context in order to unpin @@ -393,28 +370,35 @@ int i915_gem_evict_vm(struct i915_address_space *vm) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->i915); + ret = ggtt_flush(vm->gt); if (ret) return ret; } - INIT_LIST_HEAD(&eviction_list); - mutex_lock(&vm->mutex); - list_for_each_entry(vma, &vm->bound_list, vm_link) { - if (i915_vma_is_pinned(vma)) - continue; + do { + struct i915_vma *vma, *vn; + LIST_HEAD(eviction_list); - __i915_vma_pin(vma); - list_add(&vma->evict_link, &eviction_list); - } - mutex_unlock(&vm->mutex); + list_for_each_entry(vma, &vm->bound_list, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; + + __i915_vma_pin(vma); + list_add(&vma->evict_link, &eviction_list); + } + if (list_empty(&eviction_list)) + break; + + ret = 0; + list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { + __i915_vma_unpin(vma); + if (ret == 0) + ret = __i915_vma_unbind(vma); + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + } while (ret == 0); - ret = 0; - list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { - __i915_vma_unpin(vma); - if (ret == 0) - ret = i915_vma_unbind(vma); - } return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 0bf53ac1c835..d9c34a23cd67 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -59,6 +59,16 @@ #define pipelined 0 +static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.i915; +} + +static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.gt->uncore; +} + static void i965_write_fence_reg(struct i915_fence_reg *fence, struct i915_vma *vma) { @@ -66,7 +76,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, int fence_pitch_shift; u64 val; - if (INTEL_GEN(fence->i915) >= 6) { + if (INTEL_GEN(fence_to_i915(fence)) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; @@ -95,7 +105,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); /* * To w/a incoherency with non-atomic 64-bit register updates, @@ -132,7 +142,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence))) stride /= 128; else stride /= 512; @@ -148,7 +158,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -180,7 +190,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -191,15 +201,17 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, static void fence_write(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct drm_i915_private *i915 = fence_to_i915(fence); + /* * Previous access through the fence register is marshalled by * the mb() inside the fault handlers (i915_gem_release_mmaps) * and explicitly managed for internal users. */ - if (IS_GEN(fence->i915, 2)) + if (IS_GEN(i915, 2)) i830_write_fence_reg(fence, vma); - else if (IS_GEN(fence->i915, 3)) + else if (IS_GEN(i915, 3)) i915_write_fence_reg(fence, vma); else i965_write_fence_reg(fence, vma); @@ -215,6 +227,8 @@ static void fence_write(struct i915_fence_reg *fence, static int fence_update(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct i915_ggtt *ggtt = fence->ggtt; + struct intel_uncore *uncore = fence_to_uncore(fence); intel_wakeref_t wakeref; struct i915_vma *old; int ret; @@ -230,16 +244,15 @@ static int fence_update(struct i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_active_request_retire(&vma->last_fence, - &vma->obj->base.dev->struct_mutex); + ret = i915_vma_sync(vma); if (ret) return ret; } old = xchg(&fence->vma, NULL); if (old) { - ret = i915_active_request_retire(&old->last_fence, - &old->obj->base.dev->struct_mutex); + /* XXX Ideally we would move the waiting to outside the mutex */ + ret = i915_vma_sync(old); if (ret) { fence->vma = old; return ret; @@ -257,7 +270,7 @@ static int fence_update(struct i915_fence_reg *fence, old->fence = NULL; } - list_move(&fence->link, &fence->i915->ggtt.fence_list); + list_move(&fence->link, &ggtt->fence_list); } /* @@ -270,7 +283,7 @@ static int fence_update(struct i915_fence_reg *fence, * be cleared before we can use any other fences to ensure that * the new fences do not overlap the elided clears, confusing HW. */ - wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); if (!wakeref) { GEM_BUG_ON(vma); return 0; @@ -281,15 +294,15 @@ static int fence_update(struct i915_fence_reg *fence, if (vma) { vma->fence = fence; - list_move_tail(&fence->link, &fence->i915->ggtt.fence_list); + list_move_tail(&fence->link, &ggtt->fence_list); } - intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref); + intel_runtime_pm_put(uncore->rpm, wakeref); return 0; } /** - * i915_vma_put_fence - force-remove fence for a VMA + * i915_vma_revoke_fence - force-remove fence for a VMA * @vma: vma to map linearly (not through a fence reg) * * This function force-removes any fence from the given object, which is useful @@ -299,88 +312,68 @@ static int fence_update(struct i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_revoke_fence(struct i915_vma *vma) { struct i915_fence_reg *fence = vma->fence; + lockdep_assert_held(&vma->vm->mutex); if (!fence) return 0; - if (fence->pin_count) + if (atomic_read(&fence->pin_count)) return -EBUSY; return fence_update(fence, NULL); } -static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) +static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { struct i915_fence_reg *fence; - list_for_each_entry(fence, &i915->ggtt.fence_list, link) { + list_for_each_entry(fence, &ggtt->fence_list, link) { GEM_BUG_ON(fence->vma && fence->vma->fence != fence); - if (fence->pin_count) + if (atomic_read(&fence->pin_count)) continue; return fence; } /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(i915)) + if (intel_has_pending_fb_unpin(ggtt->vm.i915)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); } -/** - * i915_vma_pin_fence - set up fencing for a vma - * @vma: vma to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ -int i915_vma_pin_fence(struct i915_vma *vma) +int __i915_vma_pin_fence(struct i915_vma *vma) { + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; int err; - /* - * Note that we revoke fences on runtime suspend. Therefore the user - * must keep the device awake whilst using the fence. - */ - assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); + lockdep_assert_held(&vma->vm->mutex); /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; GEM_BUG_ON(fence->vma != vma); - fence->pin_count++; + atomic_inc(&fence->pin_count); if (!fence->dirty) { - list_move_tail(&fence->link, - &fence->i915->ggtt.fence_list); + list_move_tail(&fence->link, &ggtt->fence_list); return 0; } } else if (set) { - fence = fence_find(vma->vm->i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return PTR_ERR(fence); - GEM_BUG_ON(fence->pin_count); - fence->pin_count++; - } else + GEM_BUG_ON(atomic_read(&fence->pin_count)); + atomic_inc(&fence->pin_count); + } else { return 0; + } err = fence_update(fence, set); if (err) @@ -393,33 +386,76 @@ int i915_vma_pin_fence(struct i915_vma *vma) return 0; out_unpin: - fence->pin_count--; + atomic_dec(&fence->pin_count); + return err; +} + +/** + * i915_vma_pin_fence - set up fencing for a vma + * @vma: vma to map through a fence reg + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * This function walks the fence regs looking for a free one for @obj, + * stealing one if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + * + * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int i915_vma_pin_fence(struct i915_vma *vma) +{ + int err; + + if (!vma->fence && !i915_gem_object_is_tiled(vma->obj)) + return 0; + + /* + * Note that we revoke fences on runtime suspend. Therefore the user + * must keep the device awake whilst using the fence. + */ + assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + return err; + + err = __i915_vma_pin_fence(vma); + mutex_unlock(&vma->vm->mutex); + return err; } /** * i915_reserve_fence - Reserve a fence for vGPU - * @i915: i915 device private + * @ggtt: Global GTT * * This function walks the fence regs looking for a free one and remove * it from the fence_list. It is used to reserve fence for vGPU to use. */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt) { struct i915_fence_reg *fence; int count; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); + lockdep_assert_held(&ggtt->vm.mutex); /* Keep at least one fence available for the display engine. */ count = 0; - list_for_each_entry(fence, &i915->ggtt.fence_list, link) - count += !fence->pin_count; + list_for_each_entry(fence, &ggtt->fence_list, link) + count += !atomic_read(&fence->pin_count); if (count <= 1) return ERR_PTR(-ENOSPC); - fence = fence_find(i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return fence; @@ -431,6 +467,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) } list_del(&fence->link); + return fence; } @@ -442,26 +479,28 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) */ void i915_unreserve_fence(struct i915_fence_reg *fence) { - lockdep_assert_held(&fence->i915->drm.struct_mutex); + struct i915_ggtt *ggtt = fence->ggtt; - list_add(&fence->link, &fence->i915->ggtt.fence_list); + lockdep_assert_held(&ggtt->vm.mutex); + + list_add(&fence->link, &ggtt->fence_list); } /** * i915_gem_restore_fences - restore fence state - * @i915: i915 device private + * @ggtt: Global GTT * * Restore the hw fence state to match the software tracking again, to be called * after a gpu reset and on resume. Note that on runtime suspend we only cancel * the fences, to be reacquired by the user later. */ -void i915_gem_restore_fences(struct drm_i915_private *i915) +void i915_gem_restore_fences(struct i915_ggtt *ggtt) { int i; rcu_read_lock(); /* keep obj alive as we dereference */ - for (i = 0; i < i915->ggtt.num_fences; i++) { - struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; + for (i = 0; i < ggtt->num_fences; i++) { + struct i915_fence_reg *reg = &ggtt->fence_regs[i]; struct i915_vma *vma = READ_ONCE(reg->vma); GEM_BUG_ON(vma && vma->fence != reg); @@ -527,15 +566,16 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) */ /** - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern - * @i915: i915 device private + * detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @ggtt: Global GGTT * * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ -static void detect_bit_6_swizzle(struct drm_i915_private *i915) +static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; @@ -697,8 +737,8 @@ static void detect_bit_6_swizzle(struct drm_i915_private *i915) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } - i915->mm.bit_6_swizzle_x = swizzle_x; - i915->mm.bit_6_swizzle_y = swizzle_y; + i915->ggtt.bit_6_swizzle_x = swizzle_x; + i915->ggtt.bit_6_swizzle_y = swizzle_y; } /* @@ -799,17 +839,20 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, void i915_ggtt_init_fences(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; int num_fences; int i; INIT_LIST_HEAD(&ggtt->fence_list); INIT_LIST_HEAD(&ggtt->userfault_list); - intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm); + intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); - detect_bit_6_swizzle(i915); + detect_bit_6_swizzle(ggtt); - if (INTEL_GEN(i915) >= 7 && - !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) + if (!i915_ggtt_has_aperture(ggtt)) + num_fences = 0; + else if (INTEL_GEN(i915) >= 7 && + !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) num_fences = 32; else if (INTEL_GEN(i915) >= 4 || IS_I945G(i915) || IS_I945GM(i915) || @@ -819,18 +862,50 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) num_fences = 8; if (intel_vgpu_active(i915)) - num_fences = intel_uncore_read(&i915->uncore, + num_fences = intel_uncore_read(uncore, vgtif_reg(avail_rs.fence_num)); /* Initialize fence registers to zero */ for (i = 0; i < num_fences; i++) { struct i915_fence_reg *fence = &ggtt->fence_regs[i]; - fence->i915 = i915; + fence->ggtt = ggtt; fence->id = i; list_add_tail(&fence->link, &ggtt->fence_list); } ggtt->num_fences = num_fences; - i915_gem_restore_fences(i915); + i915_gem_restore_fences(ggtt); +} + +void intel_gt_init_swizzling(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + + if (INTEL_GEN(i915) < 5 || + i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + return; + + intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); + + if (IS_GEN(i915, 5)) + return; + + intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL); + + if (IS_GEN(i915, 6)) + intel_uncore_write(uncore, + ARB_MODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); + else if (IS_GEN(i915, 7)) + intel_uncore_write(uncore, + ARB_MODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); + else if (IS_GEN(i915, 8)) + intel_uncore_write(uncore, + GAMTARBMODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); + else + MISSING_CASE(INTEL_GEN(i915)); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index d2da98828179..7bd521cd7cd7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -29,18 +29,18 @@ #include <linux/types.h> struct drm_i915_gem_object; -struct drm_i915_private; struct i915_ggtt; struct i915_vma; +struct intel_gt; struct sg_table; #define I965_FENCE_PAGE 4096UL struct i915_fence_reg { struct list_head link; - struct drm_i915_private *i915; + struct i915_ggtt *ggtt; struct i915_vma *vma; - int pin_count; + atomic_t pin_count; int id; /** * Whether the tiling parameters for the currently @@ -54,10 +54,10 @@ struct i915_fence_reg { }; /* i915_gem_fence_reg.c */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915); +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt); void i915_unreserve_fence(struct i915_fence_reg *fence); -void i915_gem_restore_fences(struct drm_i915_private *i915); +void i915_gem_restore_fences(struct i915_ggtt *ggtt); void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, struct sg_table *pages); @@ -66,4 +66,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, void i915_ggtt_init_fences(struct i915_ggtt *ggtt); +void intel_gt_init_swizzling(struct intel_gt *gt); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7015a97b1097..e039eb56900f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1,26 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2010 Daniel Vetter - * Copyright © 2011-2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2020 Intel Corporation */ #include <linux/slab.h> /* fault-inject.h is not standalone! */ @@ -32,2291 +13,18 @@ #include <linux/stop_machine.h> #include <asm/set_memory.h> +#include <asm/smp.h> #include <drm/i915_drm.h> #include "display/intel_frontbuffer.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_scatterlist.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "intel_drv.h" - -#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -/** - * DOC: Global GTT views - * - * Background and previous state - * - * Historically objects could exists (be bound) in global GTT space only as - * singular instances with a view representing all of the object's backing pages - * in a linear fashion. This view will be called a normal view. - * - * To support multiple views of the same object, where the number of mapped - * pages is not equal to the backing store, or where the layout of the pages - * is not linear, concept of a GGTT view was added. - * - * One example of an alternative view is a stereo display driven by a single - * image. In this case we would have a framebuffer looking like this - * (2x2 pages): - * - * 12 - * 34 - * - * Above would represent a normal GGTT view as normally mapped for GPU or CPU - * rendering. In contrast, fed to the display engine would be an alternative - * view which could look something like this: - * - * 1212 - * 3434 - * - * In this example both the size and layout of pages in the alternative view is - * different from the normal view. - * - * Implementation and usage - * - * GGTT views are implemented using VMAs and are distinguished via enum - * i915_ggtt_view_type and struct i915_ggtt_view. - * - * A new flavour of core GEM functions which work with GGTT bound objects were - * added with the _ggtt_ infix, and sometimes with _view postfix to avoid - * renaming in large amounts of code. They take the struct i915_ggtt_view - * parameter encapsulating all metadata required to implement a view. - * - * As a helper for callers which are only interested in the normal view, - * globally const i915_ggtt_view_normal singleton instance exists. All old core - * GEM API functions, the ones not taking the view parameter, are operating on, - * or with the normal GGTT view. - * - * Code wanting to add or use a new GGTT view needs to: - * - * 1. Add a new enum with a suitable name. - * 2. Extend the metadata in the i915_ggtt_view structure if required. - * 3. Add support to i915_get_vma_pages(). - * - * New views are required to build a scatter-gather table from within the - * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and - * exists for the lifetime of an VMA. - * - * Core API is designed to have copy semantics which means that passed in - * struct i915_ggtt_view does not need to be persistent (left around after - * calling the core API functions). - * - */ - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma); - -static void gen6_ggtt_invalidate(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - - /* - * Note that as an uncached mmio write, this will flush the - * WCB of the writes into the GGTT before it triggers the invalidate. - */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); -} - -static void guc_ggtt_invalidate(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - - gen6_ggtt_invalidate(i915); - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); -} - -static void gmch_ggtt_invalidate(struct drm_i915_private *i915) -{ - intel_gtt_chipset_flush(); -} - -static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) -{ - i915->ggtt.invalidate(i915); -} - -static int ppgtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - u32 pte_flags; - int err; - - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { - err = vma->vm->allocate_va_range(vma->vm, - vma->node.start, vma->size); - if (err) - return err; - } - - /* Applicable to VLV, and gen8+ */ - pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) - pte_flags |= PTE_READ_ONLY; - - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - - return 0; -} - -static void ppgtt_unbind_vma(struct i915_vma *vma) -{ - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); -} - -static int ppgtt_set_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->pages); - - vma->pages = vma->obj->mm.pages; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - -static void clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - if (vma->pages != vma->obj->mm.pages) { - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); -} - -static u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; - - if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; - - switch (level) { - case I915_CACHE_NONE: - pte |= PPAT_UNCACHED; - break; - case I915_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC; - break; - default: - pte |= PPAT_CACHED; - break; - } - - return pte; -} - -static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, - const enum i915_cache_level level) -{ - gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; - pde |= addr; - if (level != I915_CACHE_NONE) - pde |= PPAT_CACHED_PDE; - else - pde |= PPAT_UNCACHED; - return pde; -} - -#define gen8_pdpe_encode gen8_pde_encode -#define gen8_pml4e_encode gen8_pde_encode - -static u64 snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_L3_LLC: - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_L3_LLC: - pte |= GEN7_PTE_CACHE_L3_LLC; - break; - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - if (!(flags & PTE_READ_ONLY)) - pte |= BYT_PTE_WRITEABLE; - - if (level != I915_CACHE_NONE) - pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; - - return pte; -} - -static u64 hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= HSW_PTE_ADDR_ENCODE(addr); - - if (level != I915_CACHE_NONE) - pte |= HSW_WB_LLC_AGE3; - - return pte; -} - -static u64 iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= HSW_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_NONE: - break; - case I915_CACHE_WT: - pte |= HSW_WT_ELLC_LLC_AGE3; - break; - default: - pte |= HSW_WB_ELLC_LLC_AGE3; - break; - } - - return pte; -} - -static void stash_init(struct pagestash *stash) -{ - pagevec_init(&stash->pvec); - spin_lock_init(&stash->lock); -} - -static struct page *stash_pop_page(struct pagestash *stash) -{ - struct page *page = NULL; - - spin_lock(&stash->lock); - if (likely(stash->pvec.nr)) - page = stash->pvec.pages[--stash->pvec.nr]; - spin_unlock(&stash->lock); - - return page; -} - -static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) -{ - unsigned int nr; - - spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); - - nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); - memcpy(stash->pvec.pages + stash->pvec.nr, - pvec->pages + pvec->nr - nr, - sizeof(pvec->pages[0]) * nr); - stash->pvec.nr += nr; - - spin_unlock(&stash->lock); - - pvec->nr -= nr; -} - -static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) -{ - struct pagevec stack; - struct page *page; - - if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) - i915_gem_shrink_all(vm->i915); - - page = stash_pop_page(&vm->free_pages); - if (page) - return page; - - if (!vm->pt_kmap_wc) - return alloc_page(gfp); - - /* Look in our global stash of WC pages... */ - page = stash_pop_page(&vm->i915->mm.wc_stash); - if (page) - return page; - - /* - * Otherwise batch allocate pages to amortize cost of set_pages_wc. - * - * We have to be careful as page allocation may trigger the shrinker - * (via direct reclaim) which will fill up the WC stash underneath us. - * So we add our WB pages into a temporary pvec on the stack and merge - * them into the WC stash after all the allocations are complete. - */ - pagevec_init(&stack); - do { - struct page *page; - - page = alloc_page(gfp); - if (unlikely(!page)) - break; - - stack.pages[stack.nr++] = page; - } while (pagevec_space(&stack)); - - if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { - page = stack.pages[--stack.nr]; - - /* Merge spare WC pages to the global stash */ - if (stack.nr) - stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); - - /* Push any surplus WC pages onto the local VM stash */ - if (stack.nr) - stash_push_pagevec(&vm->free_pages, &stack); - } - - /* Return unwanted leftovers */ - if (unlikely(stack.nr)) { - WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); - __pagevec_release(&stack); - } - - return page; -} - -static void vm_free_pages_release(struct i915_address_space *vm, - bool immediate) -{ - struct pagevec *pvec = &vm->free_pages.pvec; - struct pagevec stack; - - lockdep_assert_held(&vm->free_pages.lock); - GEM_BUG_ON(!pagevec_count(pvec)); - - if (vm->pt_kmap_wc) { - /* - * When we use WC, first fill up the global stash and then - * only if full immediately free the overflow. - */ - stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); - - /* - * As we have made some room in the VM's free_pages, - * we can wait for it to fill again. Unless we are - * inside i915_address_space_fini() and must - * immediately release the pages! - */ - if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) - return; - - /* - * We have to drop the lock to allow ourselves to sleep, - * so take a copy of the pvec and clear the stash for - * others to use it as we sleep. - */ - stack = *pvec; - pagevec_reinit(pvec); - spin_unlock(&vm->free_pages.lock); - - pvec = &stack; - set_pages_array_wb(pvec->pages, pvec->nr); - - spin_lock(&vm->free_pages.lock); - } - - __pagevec_release(pvec); -} - -static void vm_free_page(struct i915_address_space *vm, struct page *page) -{ - /* - * On !llc, we need to change the pages back to WB. We only do so - * in bulk, so we rarely need to change the page attributes here, - * but doing so requires a stop_machine() from deep inside arch/x86/mm. - * To make detection of the possible sleep more likely, use an - * unconditional might_sleep() for everybody. - */ - might_sleep(); - spin_lock(&vm->free_pages.lock); - while (!pagevec_space(&vm->free_pages.pvec)) - vm_free_pages_release(vm, false); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); - pagevec_add(&vm->free_pages.pvec, page); - spin_unlock(&vm->free_pages.lock); -} - -static void i915_address_space_init(struct i915_address_space *vm, int subclass) -{ - kref_init(&vm->ref); - - /* - * The vm->mutex must be reclaim safe (for use in the shrinker). - * Do a dummy acquire now under fs_reclaim so that any allocation - * attempt holding the lock is immediately reported by lockdep. - */ - mutex_init(&vm->mutex); - lockdep_set_subclass(&vm->mutex, subclass); - i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); - - GEM_BUG_ON(!vm->total); - drm_mm_init(&vm->mm, 0, vm->total); - vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; - - stash_init(&vm->free_pages); - - INIT_LIST_HEAD(&vm->unbound_list); - INIT_LIST_HEAD(&vm->bound_list); -} - -static void i915_address_space_fini(struct i915_address_space *vm) -{ - spin_lock(&vm->free_pages.lock); - if (pagevec_count(&vm->free_pages.pvec)) - vm_free_pages_release(vm, true); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); - spin_unlock(&vm->free_pages.lock); - - drm_mm_takedown(&vm->mm); - - mutex_destroy(&vm->mutex); -} - -static int __setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - gfp_t gfp) -{ - p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); - if (unlikely(!p->page)) - return -ENOMEM; - - p->daddr = dma_map_page_attrs(vm->dma, - p->page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { - vm_free_page(vm, p->page); - return -ENOMEM; - } - - return 0; -} - -static int setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p) -{ - return __setup_page_dma(vm, p, __GFP_HIGHMEM); -} - -static void cleanup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p) -{ - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - vm_free_page(vm, p->page); -} - -#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) - -#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) -#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v)) -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v)) - -static void fill_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - const u64 val) -{ - u64 * const vaddr = kmap_atomic(p->page); - - memset64(vaddr, val, PAGE_SIZE / sizeof(val)); - - kunmap_atomic(vaddr); -} - -static void fill_page_dma_32(struct i915_address_space *vm, - struct i915_page_dma *p, - const u32 v) -{ - fill_page_dma(vm, p, (u64)v << 32 | v); -} - -static int -setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) -{ - unsigned long size; - - /* - * In order to utilize 64K pages for an object with a size < 2M, we will - * need to support a 64K scratch page, given that every 16th entry for a - * page-table operating in 64K mode must point to a properly aligned 64K - * region, including any PTEs which happen to point to scratch. - * - * This is only relevant for the 48b PPGTT where we support - * huge-gtt-pages, see also i915_vma_insert(). However, as we share the - * scratch (read-only) between all vm, we create one 64k scratch page - * for all. - */ - size = I915_GTT_PAGE_SIZE_4K; - if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { - size = I915_GTT_PAGE_SIZE_64K; - gfp |= __GFP_NOWARN; - } - gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; - - do { - int order = get_order(size); - struct page *page; - dma_addr_t addr; - - page = alloc_pages(gfp, order); - if (unlikely(!page)) - goto skip; - - addr = dma_map_page_attrs(vm->dma, - page, 0, size, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, addr))) - goto free_page; - - if (unlikely(!IS_ALIGNED(addr, size))) - goto unmap_page; - - vm->scratch_page.page = page; - vm->scratch_page.daddr = addr; - vm->scratch_order = order; - return 0; - -unmap_page: - dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); -free_page: - __free_pages(page, order); -skip: - if (size == I915_GTT_PAGE_SIZE_4K) - return -ENOMEM; - - size = I915_GTT_PAGE_SIZE_4K; - gfp &= ~__GFP_NOWARN; - } while (1); -} - -static void cleanup_scratch_page(struct i915_address_space *vm) -{ - struct i915_page_dma *p = &vm->scratch_page; - int order = vm->scratch_order; - - dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, - PCI_DMA_BIDIRECTIONAL); - __free_pages(p->page, order); -} - -static struct i915_page_table *alloc_pt(struct i915_address_space *vm) -{ - struct i915_page_table *pt; - - pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); - if (unlikely(!pt)) - return ERR_PTR(-ENOMEM); - - if (unlikely(setup_px(vm, pt))) { - kfree(pt); - return ERR_PTR(-ENOMEM); - } - - atomic_set(&pt->used, 0); - - return pt; -} - -static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) -{ - cleanup_px(vm, pt); - kfree(pt); -} - -static void gen8_initialize_pt(struct i915_address_space *vm, - struct i915_page_table *pt) -{ - fill_px(vm, pt, vm->scratch_pte); -} - -static void gen6_initialize_pt(struct i915_address_space *vm, - struct i915_page_table *pt) -{ - fill32_px(vm, pt, vm->scratch_pte); -} - -static struct i915_page_directory *__alloc_pd(void) -{ - struct i915_page_directory *pd; - - pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); - - if (unlikely(!pd)) - return NULL; - - memset(&pd->base, 0, sizeof(pd->base)); - atomic_set(&pd->used, 0); - spin_lock_init(&pd->lock); - - /* for safety */ - pd->entry[0] = NULL; - - return pd; -} - -static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) -{ - struct i915_page_directory *pd; - - pd = __alloc_pd(); - if (unlikely(!pd)) - return ERR_PTR(-ENOMEM); - - if (unlikely(setup_px(vm, pd))) { - kfree(pd); - return ERR_PTR(-ENOMEM); - } - - return pd; -} - -static inline bool pd_has_phys_page(const struct i915_page_directory * const pd) -{ - return pd->base.page; -} - -static void free_pd(struct i915_address_space *vm, - struct i915_page_directory *pd) -{ - if (likely(pd_has_phys_page(pd))) - cleanup_px(vm, pd); - - kfree(pd); -} - -static void init_pd_with_page(struct i915_address_space *vm, - struct i915_page_directory * const pd, - struct i915_page_table *pt) -{ - fill_px(vm, pd, gen8_pde_encode(px_dma(pt), I915_CACHE_LLC)); - memset_p(pd->entry, pt, 512); -} - -static void init_pd(struct i915_address_space *vm, - struct i915_page_directory * const pd, - struct i915_page_directory * const to) -{ - GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd)); - - fill_px(vm, pd, gen8_pdpe_encode(px_dma(to), I915_CACHE_LLC)); - memset_p(pd->entry, to, 512); -} - -/* - * PDE TLBs are a pain to invalidate on GEN8+. When we modify - * the page table structures, we mark them dirty so that - * context switching/execlist queuing code takes extra steps - * to ensure that tlbs are flushed. - */ -static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) -{ - ppgtt->pd_dirty_engines = ALL_ENGINES; -} - -/* Removes entries from a single page table, releasing it if it's empty. - * Caller can use the return value to update higher-level entries. - */ -static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, - struct i915_page_table *pt, - u64 start, u64 length) -{ - unsigned int num_entries = gen8_pte_count(start, length); - gen8_pte_t *vaddr; - - vaddr = kmap_atomic_px(pt); - memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries); - kunmap_atomic(vaddr); - - GEM_BUG_ON(num_entries > atomic_read(&pt->used)); - return !atomic_sub_return(num_entries, &pt->used); -} - -static void gen8_ppgtt_set_pde(struct i915_address_space *vm, - struct i915_page_directory *pd, - struct i915_page_table *pt, - unsigned int pde) -{ - gen8_pde_t *vaddr; - - vaddr = kmap_atomic_px(pd); - vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); - kunmap_atomic(vaddr); -} - -static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - struct i915_page_table *pt; - u32 pde; - - gen8_for_each_pde(pt, pd, start, length, pde) { - bool free = false; - - GEM_BUG_ON(pt == vm->scratch_pt); - - if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) - continue; - - spin_lock(&pd->lock); - if (!atomic_read(&pt->used)) { - gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); - pd->entry[pde] = vm->scratch_pt; - - GEM_BUG_ON(!atomic_read(&pd->used)); - atomic_dec(&pd->used); - free = true; - } - spin_unlock(&pd->lock); - if (free) - free_pt(vm, pt); - } - - return !atomic_read(&pd->used); -} - -static void gen8_ppgtt_set_pdpe(struct i915_page_directory *pdp, - struct i915_page_directory *pd, - unsigned int pdpe) -{ - gen8_ppgtt_pdpe_t *vaddr; - - if (!pd_has_phys_page(pdp)) - return; - - vaddr = kmap_atomic_px(pdp); - vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_atomic(vaddr); -} - -/* Removes entries from a single page dir pointer, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ -static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, - struct i915_page_directory * const pdp, - u64 start, u64 length) -{ - struct i915_page_directory *pd; - unsigned int pdpe; - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - bool free = false; - - GEM_BUG_ON(pd == vm->scratch_pd); - - if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) - continue; - - spin_lock(&pdp->lock); - if (!atomic_read(&pd->used)) { - gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); - pdp->entry[pdpe] = vm->scratch_pd; - - GEM_BUG_ON(!atomic_read(&pdp->used)); - atomic_dec(&pdp->used); - free = true; - } - spin_unlock(&pdp->lock); - if (free) - free_pd(vm, pd); - } - - return !atomic_read(&pdp->used); -} - -static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); -} - -static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4, - struct i915_page_directory *pdp, - unsigned int pml4e) -{ - gen8_ppgtt_pml4e_t *vaddr; - - vaddr = kmap_atomic_px(pml4); - vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_atomic(vaddr); -} - -/* Removes entries from a single pml4. - * This is the top-level structure in 4-level page tables used on gen8+. - * Empty entries are always scratch pml4e. - */ -static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp; - unsigned int pml4e; - - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); - - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - bool free = false; - GEM_BUG_ON(pdp == vm->scratch_pdp); - - if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) - continue; - - spin_lock(&pml4->lock); - if (!atomic_read(&pdp->used)) { - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - pml4->entry[pml4e] = vm->scratch_pdp; - free = true; - } - spin_unlock(&pml4->lock); - if (free) - free_pd(vm, pdp); - } -} - -static inline struct sgt_dma { - struct scatterlist *sg; - dma_addr_t dma, max; -} sgt_dma(struct i915_vma *vma) { - struct scatterlist *sg = vma->pages->sgl; - dma_addr_t addr = sg_dma_address(sg); - return (struct sgt_dma) { sg, addr, addr + sg->length }; -} - -struct gen8_insert_pte { - u16 pml4e; - u16 pdpe; - u16 pde; - u16 pte; -}; - -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) -{ - return (struct gen8_insert_pte) { - gen8_pml4e_index(start), - gen8_pdpe_index(start), - gen8_pde_index(start), - gen8_pte_index(start), - }; -} - -static __always_inline bool -gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, - struct i915_page_directory *pdp, - struct sgt_dma *iter, - struct gen8_insert_pte *idx, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); - gen8_pte_t *vaddr; - bool ret; - - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = i915_pd_entry(pdp, idx->pdpe); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); - do { - vaddr[idx->pte] = pte_encode | iter->dma; - - iter->dma += I915_GTT_PAGE_SIZE; - if (iter->dma >= iter->max) { - iter->sg = __sg_next(iter->sg); - if (!iter->sg) { - ret = false; - break; - } - - iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + iter->sg->length; - } - - if (++idx->pte == GEN8_PTES) { - idx->pte = 0; - - if (++idx->pde == I915_PDES) { - idx->pde = 0; - - /* Limited by sg length for 3lvl */ - if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { - idx->pdpe = 0; - ret = true; - break; - } - - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = pdp->entry[idx->pdpe]; - } - - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); - } - } while (1); - kunmap_atomic(vaddr); - - return ret; -} - -static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = sgt_dma(vma); - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - - gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx, - cache_level, flags); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; -} - -static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, - struct i915_page_directory *pml4, - struct sgt_dma *iter, - enum i915_cache_level cache_level, - u32 flags) -{ - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); - u64 start = vma->node.start; - dma_addr_t rem = iter->sg->length; - - do { - struct gen8_insert_pte idx = gen8_insert_pte(start); - struct i915_page_directory *pdp = - i915_pdp_entry(pml4, idx.pml4e); - struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe); - unsigned int page_size; - bool maybe_64K = false; - gen8_pte_t encode = pte_encode; - gen8_pte_t *vaddr; - u16 index, max; - - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && - IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && - rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { - index = idx.pde; - max = I915_PDES; - page_size = I915_GTT_PAGE_SIZE_2M; - - encode |= GEN8_PDE_PS_2M; - - vaddr = kmap_atomic_px(pd); - } else { - struct i915_page_table *pt = i915_pt_entry(pd, idx.pde); - - index = idx.pte; - max = GEN8_PTES; - page_size = I915_GTT_PAGE_SIZE; - - if (!index && - vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && - IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && - (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (max - index) * I915_GTT_PAGE_SIZE)) - maybe_64K = true; - - vaddr = kmap_atomic_px(pt); - } - - do { - GEM_BUG_ON(iter->sg->length < page_size); - vaddr[index++] = encode | iter->dma; - - start += page_size; - iter->dma += page_size; - rem -= page_size; - if (iter->dma >= iter->max) { - iter->sg = __sg_next(iter->sg); - if (!iter->sg) - break; - - rem = iter->sg->length; - iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + rem; - - if (maybe_64K && index < max && - !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && - (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (max - index) * I915_GTT_PAGE_SIZE))) - maybe_64K = false; - - if (unlikely(!IS_ALIGNED(iter->dma, page_size))) - break; - } - } while (rem >= page_size && index < max); - - kunmap_atomic(vaddr); - - /* - * Is it safe to mark the 2M block as 64K? -- Either we have - * filled whole page-table with 64K entries, or filled part of - * it and have reached the end of the sg table and we have - * enough padding. - */ - if (maybe_64K && - (index == max || - (i915_vm_has_scratch_64K(vma->vm) && - !iter->sg && IS_ALIGNED(vma->node.start + - vma->node.size, - I915_GTT_PAGE_SIZE_2M)))) { - vaddr = kmap_atomic_px(pd); - vaddr[idx.pde] |= GEN8_PDE_IPS_64K; - kunmap_atomic(vaddr); - page_size = I915_GTT_PAGE_SIZE_64K; - - /* - * We write all 4K page entries, even when using 64K - * pages. In order to verify that the HW isn't cheating - * by using the 4K PTE instead of the 64K PTE, we want - * to remove all the surplus entries. If the HW skipped - * the 64K PTE, it will read/write into the scratch page - * instead - which we detect as missing results during - * selftests. - */ - if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { - u16 i; - - encode = vma->vm->scratch_pte; - vaddr = kmap_atomic_px(i915_pt_entry(pd, - idx.pde)); - - for (i = 1; i < index; i += 16) - memset64(vaddr + i, encode, 15); - - kunmap_atomic(vaddr); - } - } - - vma->page_sizes.gtt |= page_size; - } while (iter->sg); -} - -static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = sgt_dma(vma); - struct i915_page_directory * const pml4 = ppgtt->pd; - - if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, - flags); - } else { - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - - while (gen8_ppgtt_insert_pte_entries(ppgtt, - i915_pdp_entry(pml4, idx.pml4e++), - &iter, &idx, cache_level, - flags)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - } -} - -static void gen8_free_page_tables(struct i915_address_space *vm, - struct i915_page_directory *pd) -{ - int i; - - for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i] != vm->scratch_pt) - free_pt(vm, pd->entry[i]); - } -} - -static int gen8_init_scratch(struct i915_address_space *vm) -{ - int ret; - - /* - * If everybody agrees to not to write into the scratch page, - * we can reuse it for all vm, keeping contexts and processes separate. - */ - if (vm->has_read_only && - vm->i915->kernel_context && - vm->i915->kernel_context->vm) { - struct i915_address_space *clone = vm->i915->kernel_context->vm; - - GEM_BUG_ON(!clone->has_read_only); - - vm->scratch_order = clone->scratch_order; - vm->scratch_pte = clone->scratch_pte; - vm->scratch_pt = clone->scratch_pt; - vm->scratch_pd = clone->scratch_pd; - vm->scratch_pdp = clone->scratch_pdp; - return 0; - } - - ret = setup_scratch_page(vm, __GFP_HIGHMEM); - if (ret) - return ret; - - vm->scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, - vm->has_read_only); - - vm->scratch_pt = alloc_pt(vm); - if (IS_ERR(vm->scratch_pt)) { - ret = PTR_ERR(vm->scratch_pt); - goto free_scratch_page; - } - - vm->scratch_pd = alloc_pd(vm); - if (IS_ERR(vm->scratch_pd)) { - ret = PTR_ERR(vm->scratch_pd); - goto free_pt; - } - - if (i915_vm_is_4lvl(vm)) { - vm->scratch_pdp = alloc_pd(vm); - if (IS_ERR(vm->scratch_pdp)) { - ret = PTR_ERR(vm->scratch_pdp); - goto free_pd; - } - } - - gen8_initialize_pt(vm, vm->scratch_pt); - init_pd_with_page(vm, vm->scratch_pd, vm->scratch_pt); - if (i915_vm_is_4lvl(vm)) - init_pd(vm, vm->scratch_pdp, vm->scratch_pd); - - return 0; - -free_pd: - free_pd(vm, vm->scratch_pd); -free_pt: - free_pt(vm, vm->scratch_pt); -free_scratch_page: - cleanup_scratch_page(vm); - - return ret; -} - -static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) -{ - struct i915_address_space *vm = &ppgtt->vm; - struct drm_i915_private *dev_priv = vm->i915; - enum vgt_g2v_type msg; - int i; - - if (i915_vm_is_4lvl(vm)) { - const u64 daddr = px_dma(ppgtt->pd); - - I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); - - msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); - } else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) { - const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); - - I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); - } - - msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); - } - - I915_WRITE(vgtif_reg(g2v_notify), msg); - - return 0; -} - -static void gen8_free_scratch(struct i915_address_space *vm) -{ - if (!vm->scratch_page.daddr) - return; - - if (i915_vm_is_4lvl(vm)) - free_pd(vm, vm->scratch_pdp); - free_pd(vm, vm->scratch_pd); - free_pt(vm, vm->scratch_pt); - cleanup_scratch_page(vm); -} - -static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, - struct i915_page_directory *pdp) -{ - const unsigned int pdpes = i915_pdpes_per_pdp(vm); - int i; - - for (i = 0; i < pdpes; i++) { - if (pdp->entry[i] == vm->scratch_pd) - continue; - - gen8_free_page_tables(vm, pdp->entry[i]); - free_pd(vm, pdp->entry[i]); - } - - free_pd(vm, pdp); -} - -static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) -{ - struct i915_page_directory * const pml4 = ppgtt->pd; - int i; - - for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { - struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); - - if (pdp == ppgtt->vm.scratch_pdp) - continue; - - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); - } - - free_pd(&ppgtt->vm, pml4); -} - -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) -{ - struct drm_i915_private *i915 = vm->i915; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (intel_vgpu_active(i915)) - gen8_ppgtt_notify_vgt(ppgtt, false); - - if (i915_vm_is_4lvl(vm)) - gen8_ppgtt_cleanup_4lvl(ppgtt); - else - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); - - gen8_free_scratch(vm); -} - -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - struct i915_page_table *pt, *alloc = NULL; - u64 from = start; - unsigned int pde; - int ret = 0; - - spin_lock(&pd->lock); - gen8_for_each_pde(pt, pd, start, length, pde) { - const int count = gen8_pte_count(start, length); - - if (pt == vm->scratch_pt) { - spin_unlock(&pd->lock); - - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind; - } - - if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) - gen8_initialize_pt(vm, pt); - - spin_lock(&pd->lock); - if (pd->entry[pde] == vm->scratch_pt) { - gen8_ppgtt_set_pde(vm, pd, pt, pde); - pd->entry[pde] = pt; - atomic_inc(&pd->used); - } else { - alloc = pt; - pt = pd->entry[pde]; - } - } - - atomic_add(count, &pt->used); - } - spin_unlock(&pd->lock); - goto out; - -unwind: - gen8_ppgtt_clear_pd(vm, pd, from, start - from); -out: - if (alloc) - free_pt(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, - struct i915_page_directory *pdp, - u64 start, u64 length) -{ - struct i915_page_directory *pd, *alloc = NULL; - u64 from = start; - unsigned int pdpe; - int ret = 0; - - spin_lock(&pdp->lock); - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (pd == vm->scratch_pd) { - spin_unlock(&pdp->lock); - - pd = fetch_and_zero(&alloc); - if (!pd) - pd = alloc_pd(vm); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto unwind; - } - - init_pd_with_page(vm, pd, vm->scratch_pt); - - spin_lock(&pdp->lock); - if (pdp->entry[pdpe] == vm->scratch_pd) { - gen8_ppgtt_set_pdpe(pdp, pd, pdpe); - pdp->entry[pdpe] = pd; - atomic_inc(&pdp->used); - } else { - alloc = pd; - pd = pdp->entry[pdpe]; - } - } - atomic_inc(&pd->used); - spin_unlock(&pdp->lock); - - ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (unlikely(ret)) - goto unwind_pd; - - spin_lock(&pdp->lock); - atomic_dec(&pd->used); - } - spin_unlock(&pdp->lock); - goto out; - -unwind_pd: - spin_lock(&pdp->lock); - if (atomic_dec_and_test(&pd->used)) { - gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); - pdp->entry[pdpe] = vm->scratch_pd; - GEM_BUG_ON(!atomic_read(&pdp->used)); - atomic_dec(&pdp->used); - GEM_BUG_ON(alloc); - alloc = pd; /* defer the free to after the lock */ - } - spin_unlock(&pdp->lock); -unwind: - gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); -out: - if (alloc) - free_pd(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - return gen8_ppgtt_alloc_pdp(vm, - i915_vm_to_ppgtt(vm)->pd, start, length); -} - -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp, *alloc = NULL; - u64 from = start; - int ret = 0; - u32 pml4e; - - spin_lock(&pml4->lock); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (pdp == vm->scratch_pdp) { - spin_unlock(&pml4->lock); - - pdp = fetch_and_zero(&alloc); - if (!pdp) - pdp = alloc_pd(vm); - if (IS_ERR(pdp)) { - ret = PTR_ERR(pdp); - goto unwind; - } - - init_pd(vm, pdp, vm->scratch_pd); - - spin_lock(&pml4->lock); - if (pml4->entry[pml4e] == vm->scratch_pdp) { - gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); - pml4->entry[pml4e] = pdp; - } else { - alloc = pdp; - pdp = pml4->entry[pml4e]; - } - } - atomic_inc(&pdp->used); - spin_unlock(&pml4->lock); - - ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); - if (unlikely(ret)) - goto unwind_pdp; - - spin_lock(&pml4->lock); - atomic_dec(&pdp->used); - } - spin_unlock(&pml4->lock); - goto out; - -unwind_pdp: - spin_lock(&pml4->lock); - if (atomic_dec_and_test(&pdp->used)) { - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - pml4->entry[pml4e] = vm->scratch_pdp; - GEM_BUG_ON(alloc); - alloc = pdp; /* defer the free until after the lock */ - } - spin_unlock(&pml4->lock); -unwind: - gen8_ppgtt_clear_4lvl(vm, from, start - from); -out: - if (alloc) - free_pd(vm, alloc); - return ret; -} - -static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) -{ - struct i915_address_space *vm = &ppgtt->vm; - struct i915_page_directory *pdp = ppgtt->pd; - struct i915_page_directory *pd; - u64 start = 0, length = ppgtt->vm.total; - u64 from = start; - unsigned int pdpe; - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - pd = alloc_pd(vm); - if (IS_ERR(pd)) - goto unwind; - - init_pd_with_page(vm, pd, vm->scratch_pt); - gen8_ppgtt_set_pdpe(pdp, pd, pdpe); - - atomic_inc(&pdp->used); - } - - atomic_inc(&pdp->used); /* never remove */ - - return 0; - -unwind: - start -= from; - gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { - gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); - free_pd(vm, pd); - } - atomic_set(&pdp->used, 0); - return -ENOMEM; -} - -static void ppgtt_init(struct drm_i915_private *i915, - struct i915_ppgtt *ppgtt) -{ - ppgtt->vm.i915 = i915; - ppgtt->vm.dma = &i915->drm.pdev->dev; - ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); - - i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); - - ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; - ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; -} - -/* - * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers - * with a net effect resembling a 2-level page table in normal x86 terms. Each - * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address - * space. - * - */ -static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ppgtt *ppgtt; - int err; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ppgtt_init(i915, ppgtt); - - /* - * From bdw, there is hw support for read-only pages in the PPGTT. - * - * Gen11 has HSDES#:1807136187 unresolved. Disable ro support - * for now. - */ - ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; - - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915)) - ppgtt->vm.pt_kmap_wc = true; - - err = gen8_init_scratch(&ppgtt->vm); - if (err) - goto err_free; - - ppgtt->pd = __alloc_pd(); - if (!ppgtt->pd) { - err = -ENOMEM; - goto err_free_scratch; - } - - if (i915_vm_is_4lvl(&ppgtt->vm)) { - err = setup_px(&ppgtt->vm, ppgtt->pd); - if (err) - goto err_free_pdp; - - init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp); - - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; - } else { - /* - * We don't need to setup dma for top level pdp, only - * for entries. So point entries to scratch. - */ - memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd, - GEN8_3LVL_PDPES); - - if (intel_vgpu_active(i915)) { - err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) - goto err_free_pdp; - } - - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; - } - - if (intel_vgpu_active(i915)) - gen8_ppgtt_notify_vgt(ppgtt, true); - - ppgtt->vm.cleanup = gen8_ppgtt_cleanup; - - return ppgtt; - -err_free_pdp: - free_pd(&ppgtt->vm, ppgtt->pd); -err_free_scratch: - gen8_free_scratch(&ppgtt->vm); -err_free: - kfree(ppgtt); - return ERR_PTR(err); -} - -/* Write pde (index) from the page directory @pd to the page table @pt */ -static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, - const unsigned int pde, - const struct i915_page_table *pt) -{ - /* Caller needs to make sure the write completes if necessary */ - iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, - ppgtt->pd_addr + pde); -} - -static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - u32 ecochk, ecobits; - enum intel_engine_id id; - - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); - - ecochk = I915_READ(GAM_ECOCHK); - if (IS_HASWELL(dev_priv)) { - ecochk |= ECOCHK_PPGTT_WB_HSW; - } else { - ecochk |= ECOCHK_PPGTT_LLC_IVB; - ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; - } - I915_WRITE(GAM_ECOCHK, ecochk); - - for_each_engine(engine, dev_priv, id) { - /* GFX_MODE is per-ring on gen7+ */ - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } -} - -static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) -{ - u32 ecochk, gab_ctl, ecobits; - - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | - ECOBITS_PPGTT_CACHE64B); - - gab_ctl = I915_READ(GAB_CTL); - I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); - - ecochk = I915_READ(GAM_ECOCHK); - I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); - - if (HAS_PPGTT(dev_priv)) /* may be disabled for VT-d */ - I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); -} - -/* PPGTT support for Sandybdrige/Gen6 and later */ -static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch_pte; - unsigned int pde = first_entry / GEN6_PTES; - unsigned int pte = first_entry % GEN6_PTES; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - - while (num_entries) { - struct i915_page_table * const pt = - i915_pt_entry(ppgtt->base.pd, pde++); - const unsigned int count = min(num_entries, GEN6_PTES - pte); - gen6_pte_t *vaddr; - - GEM_BUG_ON(pt == vm->scratch_pt); - - num_entries -= count; - - GEM_BUG_ON(count > atomic_read(&pt->used)); - if (!atomic_sub_return(count, &pt->used)) - ppgtt->scan_for_unused_pt = true; - - /* - * Note that the hw doesn't support removing PDE on the fly - * (they are cached inside the context with no means to - * invalidate the cache), so we can only reset the PTE - * entries back to scratch. - */ - - vaddr = kmap_atomic_px(pt); - memset32(vaddr + pte, scratch_pte, count); - kunmap_atomic(vaddr); - - pte = 0; - } -} - -static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pd = ppgtt->pd; - unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned act_pte = first_entry % GEN6_PTES; - const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter = sgt_dma(vma); - gen6_pte_t *vaddr; - - GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt); - - vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); - do { - vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - - iter.dma += I915_GTT_PAGE_SIZE; - if (iter.dma == iter.max) { - iter.sg = __sg_next(iter.sg); - if (!iter.sg) - break; - - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; - } - - if (++act_pte == GEN6_PTES) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); - act_pte = 0; - } - } while (1); - kunmap_atomic(vaddr); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; -} - -static int gen6_alloc_va_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt, *alloc = NULL; - intel_wakeref_t wakeref; - u64 from = start; - unsigned int pde; - bool flush = false; - int ret = 0; - - wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); - - spin_lock(&pd->lock); - gen6_for_each_pde(pt, pd, start, length, pde) { - const unsigned int count = gen6_pte_count(start, length); - - if (pt == vm->scratch_pt) { - spin_unlock(&pd->lock); - - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } - - gen6_initialize_pt(vm, pt); - - spin_lock(&pd->lock); - if (pd->entry[pde] == vm->scratch_pt) { - pd->entry[pde] = pt; - if (i915_vma_is_bound(ppgtt->vma, - I915_VMA_GLOBAL_BIND)) { - gen6_write_pde(ppgtt, pde, pt); - flush = true; - } - } else { - alloc = pt; - pt = pd->entry[pde]; - } - } - - atomic_add(count, &pt->used); - } - spin_unlock(&pd->lock); - - if (flush) { - mark_tlbs_dirty(&ppgtt->base); - gen6_ggtt_invalidate(vm->i915); - } - - goto out; - -unwind_out: - gen6_ppgtt_clear_range(vm, from, start - from); -out: - if (alloc) - free_pt(vm, alloc); - intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); - return ret; -} - -static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) -{ - struct i915_address_space * const vm = &ppgtt->base.vm; - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *unused; - u32 pde; - int ret; - - ret = setup_scratch_page(vm, __GFP_HIGHMEM); - if (ret) - return ret; - - vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_NONE, - PTE_READ_ONLY); - - vm->scratch_pt = alloc_pt(vm); - if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(vm); - return PTR_ERR(vm->scratch_pt); - } - - gen6_initialize_pt(vm, vm->scratch_pt); - - gen6_for_all_pdes(unused, pd, pde) - pd->entry[pde] = vm->scratch_pt; - - return 0; -} - -static void gen6_ppgtt_free_scratch(struct i915_address_space *vm) -{ - free_pt(vm, vm->scratch_pt); - cleanup_scratch_page(vm); -} - -static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) -{ - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt; - u32 pde; - - gen6_for_all_pdes(pt, pd, pde) - if (pt != ppgtt->base.vm.scratch_pt) - free_pt(&ppgtt->base.vm, pt); -} - -struct gen6_ppgtt_cleanup_work { - struct work_struct base; - struct i915_vma *vma; -}; - -static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) -{ - struct gen6_ppgtt_cleanup_work *work = - container_of(wrk, typeof(*work), base); - /* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */ - struct drm_i915_private *i915 = work->vma->vm->i915; - - mutex_lock(&i915->drm.struct_mutex); - i915_vma_destroy(work->vma); - mutex_unlock(&i915->drm.struct_mutex); - - kfree(work); -} - -static int nop_set_pages(struct i915_vma *vma) -{ - return -ENODEV; -} - -static void nop_clear_pages(struct i915_vma *vma) -{ -} - -static int nop_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - return -ENODEV; -} - -static void nop_unbind(struct i915_vma *vma) -{ -} - -static const struct i915_vma_ops nop_vma_ops = { - .set_pages = nop_set_pages, - .clear_pages = nop_clear_pages, - .bind_vma = nop_bind, - .unbind_vma = nop_unbind, -}; - -static void gen6_ppgtt_cleanup(struct i915_address_space *vm) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct gen6_ppgtt_cleanup_work *work = ppgtt->work; - - /* FIXME remove the struct_mutex to bring the locking under control */ - INIT_WORK(&work->base, gen6_ppgtt_cleanup_work); - work->vma = ppgtt->vma; - work->vma->ops = &nop_vma_ops; - schedule_work(&work->base); - - gen6_ppgtt_free_pd(ppgtt); - gen6_ppgtt_free_scratch(vm); - kfree(ppgtt->base.pd); -} - -static int pd_vma_set_pages(struct i915_vma *vma) -{ - vma->pages = ERR_PTR(-ENODEV); - return 0; -} - -static void pd_vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - vma->pages = NULL; -} - -static int pd_vma_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); - struct gen6_ppgtt *ppgtt = vma->private; - u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; - struct i915_page_table *pt; - unsigned int pde; - - ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; - - gen6_for_all_pdes(pt, ppgtt->base.pd, pde) - gen6_write_pde(ppgtt, pde, pt); - - mark_tlbs_dirty(&ppgtt->base); - gen6_ggtt_invalidate(ppgtt->base.vm.i915); - - return 0; -} - -static void pd_vma_unbind(struct i915_vma *vma) -{ - struct gen6_ppgtt *ppgtt = vma->private; - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt; - struct i915_page_table *pt; - unsigned int pde; - - if (!ppgtt->scan_for_unused_pt) - return; - - /* Free all no longer used page tables */ - gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { - if (atomic_read(&pt->used) || pt == scratch_pt) - continue; - - free_pt(&ppgtt->base.vm, pt); - pd->entry[pde] = scratch_pt; - } - - ppgtt->scan_for_unused_pt = false; -} - -static const struct i915_vma_ops pd_vma_ops = { - .set_pages = pd_vma_set_pages, - .clear_pages = pd_vma_clear_pages, - .bind_vma = pd_vma_bind, - .unbind_vma = pd_vma_unbind, -}; - -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct drm_i915_private *i915 = ppgtt->base.vm.i915; - struct i915_ggtt *ggtt = &i915->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(i915, &vma->active, NULL); - INIT_ACTIVE_REQUEST(&vma->last_fence); - - vma->vm = &ggtt->vm; - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - vma->flags = I915_VMA_GGTT; - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - mutex_lock(&vma->vm->mutex); - list_add(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); - - return vma; -} - -int gen6_ppgtt_pin(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - int err; - - GEM_BUG_ON(ppgtt->base.vm.closed); - - /* - * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt - * which will be pinned into every active context. - * (When vma->pin_count becomes atomic, I expect we will naturally - * need a larger, unpacked, type and kill this redundancy.) - */ - if (ppgtt->pin_count++) - return 0; - - /* - * PPGTT PDEs reside in the GGTT and consists of 512 entries. The - * allocator works in address space sizes, so it's multiplied by page - * size. We allocate at the top of the GTT to avoid fragmentation. - */ - err = i915_vma_pin(ppgtt->vma, - 0, GEN6_PD_ALIGN, - PIN_GLOBAL | PIN_HIGH); - if (err) - goto unpin; - - return 0; - -unpin: - ppgtt->pin_count = 0; - return err; -} - -void gen6_ppgtt_unpin(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - - GEM_BUG_ON(!ppgtt->pin_count); - if (--ppgtt->pin_count) - return; - - i915_vma_unpin(ppgtt->vma); -} - -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - - if (!ppgtt->pin_count) - return; - - ppgtt->pin_count = 0; - i915_vma_unpin(ppgtt->vma); -} - -static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ggtt * const ggtt = &i915->ggtt; - struct gen6_ppgtt *ppgtt; - int err; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ppgtt_init(i915, &ppgtt->base); - - ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; - ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; - - ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - - ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL); - if (!ppgtt->work) { - err = -ENOMEM; - goto err_free; - } - - ppgtt->base.pd = __alloc_pd(); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_work; - } - - err = gen6_ppgtt_init_scratch(ppgtt); - if (err) - goto err_pd; - - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); - goto err_scratch; - } - - return &ppgtt->base; - -err_scratch: - gen6_ppgtt_free_scratch(&ppgtt->base.vm); -err_pd: - kfree(ppgtt->base.pd); -err_work: - kfree(ppgtt->work); -err_free: - kfree(ppgtt); - return ERR_PTR(err); -} - -static void gtt_write_workarounds(struct drm_i915_private *dev_priv) -{ - /* This function is for gtt related workarounds. This function is - * called on driver load and after a GPU reset, so you can place - * workarounds here even if they get overwritten by GPU reset. - */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); - else if (IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_LP(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); - else if (INTEL_GEN(dev_priv) >= 9) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - - /* - * To support 64K PTEs we need to first enable the use of the - * Intermediate-Page-Size(IPS) bit of the PDE field via some magical - * mmio, otherwise the page-walker will simply ignore the IPS bit. This - * shouldn't be needed after GEN10. - * - * 64K pages were first introduced from BDW+, although technically they - * only *work* from gen9+. For pre-BDW we instead have the option for - * 32K pages, but we don't currently have any support for it in our - * driver. - */ - if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && - INTEL_GEN(dev_priv) <= 10) - I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, - I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | - GAMW_ECO_ENABLE_64K_IPS_FIELD); -} - -int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) -{ - gtt_write_workarounds(dev_priv); - - if (IS_GEN(dev_priv, 6)) - gen6_ppgtt_enable(dev_priv); - else if (IS_GEN(dev_priv, 7)) - gen7_ppgtt_enable(dev_priv); - - return 0; -} - -static struct i915_ppgtt * -__ppgtt_create(struct drm_i915_private *i915) -{ - if (INTEL_GEN(i915) < 8) - return gen6_ppgtt_create(i915); - else - return gen8_ppgtt_create(i915); -} - -struct i915_ppgtt * -i915_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ppgtt *ppgtt; - - ppgtt = __ppgtt_create(i915); - if (IS_ERR(ppgtt)) - return ppgtt; - - trace_i915_ppgtt_create(&ppgtt->vm); - - return ppgtt; -} - -static void ppgtt_destroy_vma(struct i915_address_space *vm) -{ - struct list_head *phases[] = { - &vm->bound_list, - &vm->unbound_list, - NULL, - }, **phase; - - vm->closed = true; - for (phase = phases; *phase; phase++) { - struct i915_vma *vma, *vn; - - list_for_each_entry_safe(vma, vn, *phase, vm_link) - i915_vma_destroy(vma); - } -} - -void i915_vm_release(struct kref *kref) -{ - struct i915_address_space *vm = - container_of(kref, struct i915_address_space, ref); - - GEM_BUG_ON(i915_is_ggtt(vm)); - trace_i915_ppgtt_release(vm); - - ppgtt_destroy_vma(vm); - - GEM_BUG_ON(!list_empty(&vm->bound_list)); - GEM_BUG_ON(!list_empty(&vm->unbound_list)); - - vm->cleanup(vm); - i915_address_space_fini(vm); - - kfree(vm); -} - -/* Certain Gen5 chipsets require require idling the GPU before - * unmapping anything from the GTT when VT-d is enabled. - */ -static bool needs_idle_maps(struct drm_i915_private *dev_priv) -{ - /* Query intel_iommu to see if we need the workaround. Presumably that - * was loaded first. - */ - return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active(); -} - -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - /* Don't bother messing with faults pre GEN6 as we have little - * documentation supporting that it's a good idea. - */ - if (INTEL_GEN(dev_priv) < 6) - return; - - i915_check_and_clear_faults(dev_priv); - - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - - i915_ggtt_invalidate(dev_priv); -} int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) @@ -2344,364 +52,6 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, return -ENOSPC; } -static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) -{ - writeq(pte, addr); -} - -static void gen8_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *pte = - (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); - - ggtt->invalidate(vm->i915); -} - -static void gen8_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); - dma_addr_t addr; - - /* - * Note that we ignore PTE_READ_ONLY here. The caller must be careful - * not to allow the user to override access to a read only page. - */ - - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_dma(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(vm->i915); -} - -static void gen6_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *pte = - (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - iowrite32(vm->pte_encode(addr, level, flags), pte); - - ggtt->invalidate(vm->i915); -} - -/* - * Binds an object into the global gtt with the specified cache level. The object - * will be accessible to the GPU via commands whose operands reference offsets - * within the global GTT as well as accessible by the GPU through the GMADR - * mapped BAR (dev_priv->mm.gtt->gtt). - */ -static void gen6_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; - struct sgt_iter iter; - dma_addr_t addr; - for_each_sgt_dma(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(vm->i915); -} - -static void nop_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ -} - -static void gen8_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned first_entry = start / I915_GTT_PAGE_SIZE; - unsigned num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch_pte; - gen8_pte_t __iomem *gtt_base = - (gen8_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - for (i = 0; i < num_entries; i++) - gen8_set_pte(>t_base[i], scratch_pte); -} - -static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) -{ - struct drm_i915_private *dev_priv = vm->i915; - - /* - * Make sure the internal GAM fifo has been cleared of all GTT - * writes before exiting stop_machine(). This guarantees that - * any aperture accesses waiting to start in another process - * cannot back up behind the GTT writes causing a hang. - * The register can be any arbitrary GAM register. - */ - POSTING_READ(GFX_FLSH_CNTL_GEN6); -} - -struct insert_page { - struct i915_address_space *vm; - dma_addr_t addr; - u64 offset; - enum i915_cache_level level; -}; - -static int bxt_vtd_ggtt_insert_page__cb(void *_arg) -{ - struct insert_page *arg = _arg; - - gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct insert_page arg = { vm, addr, offset, level }; - - stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); -} - -struct insert_entries { - struct i915_address_space *vm; - struct i915_vma *vma; - enum i915_cache_level level; - u32 flags; -}; - -static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, vma, level, flags }; - - stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); -} - -struct clear_range { - struct i915_address_space *vm; - u64 start; - u64 length; -}; - -static int bxt_vtd_ggtt_clear_range__cb(void *_arg) -{ - struct clear_range *arg = _arg; - - gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, - u64 start, - u64 length) -{ - struct clear_range arg = { vm, start, length }; - - stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); -} - -static void gen6_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned first_entry = start / I915_GTT_PAGE_SIZE; - unsigned num_entries = length / I915_GTT_PAGE_SIZE; - gen6_pte_t scratch_pte, __iomem *gtt_base = - (gen6_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - scratch_pte = vm->scratch_pte; - - for (i = 0; i < num_entries; i++) - iowrite32(scratch_pte, >t_base[i]); -} - -static void i915_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); -} - -static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, - flags); -} - -static void i915_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); -} - -static int ggtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct drm_i915_private *i915 = vma->vm->i915; - struct drm_i915_gem_object *obj = vma->obj; - intel_wakeref_t wakeref; - u32 pte_flags; - - /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ - pte_flags = 0; - if (i915_gem_object_is_readonly(obj)) - pte_flags |= PTE_READ_ONLY; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - - /* - * Without aliasing PPGTT there's no difference between - * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally - * upgrade to both bound if we bind either to avoid double-binding. - */ - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - - return 0; -} - -static void ggtt_unbind_vma(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); -} - -static int aliasing_gtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct drm_i915_private *i915 = vma->vm->i915; - u32 pte_flags; - int ret; - - /* Currently applicable only to VLV */ - pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) - pte_flags |= PTE_READ_ONLY; - - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { - ret = appgtt->vm.allocate_va_range(&appgtt->vm, - vma->node.start, - vma->size); - if (ret) - return ret; - } - - appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level, - pte_flags); - } - - if (flags & I915_VMA_GLOBAL_BIND) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - vma->vm->insert_entries(vma->vm, vma, - cache_level, pte_flags); - } - } - - return 0; -} - -static void aliasing_gtt_unbind_vma(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - - if (vma->flags & I915_VMA_GLOBAL_BIND) { - struct i915_address_space *vm = vma->vm; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vm->clear_range(vm, vma->node.start, vma->size); - } - - if (vma->flags & I915_VMA_LOCAL_BIND) { - struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm; - - vm->clear_range(vm, vma->node.start, vma->size); - } -} - void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -2710,7 +60,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { + /* XXX This does not prevent more requests being submitted! */ + if (intel_gt_retire_requests_timeout(ggtt->vm.gt, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); @@ -2720,1203 +72,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } -static int ggtt_set_pages(struct i915_vma *vma) -{ - int ret; - - GEM_BUG_ON(vma->pages); - - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - -static void i915_gtt_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, - u64 *end) -{ - if (node->allocated && node->color != color) - *start += I915_GTT_PAGE_SIZE; - - /* Also leave a space between the unallocated reserved node after the - * GTT and any objects within the GTT, i.e. we use the color adjustment - * to insert a guard page to prevent prefetches crossing over the - * GTT boundary. - */ - node = list_next_entry(node, node_list); - if (node->color != color) - *end -= I915_GTT_PAGE_SIZE; -} - -static int init_aliasing_ppgtt(struct drm_i915_private *i915) -{ - struct i915_ggtt *ggtt = &i915->ggtt; - struct i915_ppgtt *ppgtt; - int err; - - ppgtt = i915_ppgtt_create(i915); - if (IS_ERR(ppgtt)) - return PTR_ERR(ppgtt); - - if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { - err = -ENODEV; - goto err_ppgtt; - } - - /* - * Note we only pre-allocate as far as the end of the global - * GTT. On 48b / 4-level page-tables, the difference is very, - * very significant! We have to preallocate as GVT/vgpu does - * not like the page directory disappearing. - */ - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); - if (err) - goto err_ppgtt; - - i915->mm.aliasing_ppgtt = ppgtt; - - GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); - ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; - - GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); - ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - - return 0; - -err_ppgtt: - i915_vm_put(&ppgtt->vm); - return err; -} - -static void fini_aliasing_ppgtt(struct drm_i915_private *i915) -{ - struct i915_ggtt *ggtt = &i915->ggtt; - struct i915_ppgtt *ppgtt; - - ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); - if (!ppgtt) - return; - - i915_vm_put(&ppgtt->vm); - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; -} - -static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) -{ - u64 size; - int ret; - - if (!USES_GUC(ggtt->vm.i915)) - return 0; - - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; - - ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); - if (ret) - DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); - - return ret; -} - -static void ggtt_release_guc_top(struct i915_ggtt *ggtt) -{ - if (drm_mm_node_allocated(&ggtt->uc_fw)) - drm_mm_remove_node(&ggtt->uc_fw); -} - -int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) -{ - /* Let GEM Manage all of the aperture. - * - * However, leave one page at the end still bound to the scratch page. - * There are a number of places where the hardware apparently prefetches - * past the end of the object, and we've seen multiple hangs with the - * GPU head pointer stuck in a batchbuffer bound at the last page of the - * aperture. One page should be enough to keep any prefetching inside - * of the aperture. - */ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long hole_start, hole_end; - struct drm_mm_node *entry; - int ret; - - /* - * GuC requires all resources that we're sharing with it to be placed in - * non-WOPCM memory. If GuC is not present or not in use we still need a - * small bias as ring wraparound at offset 0 sometimes hangs. No idea - * why. - */ - ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, - intel_wopcm_guc_size(&dev_priv->wopcm)); - - ret = intel_vgt_balloon(dev_priv); - if (ret) - return ret; - - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; - - /* - * The upper portion of the GuC address space has a sizeable hole - * (several MB) that is inaccessible by GuC. Reserve this range within - * GGTT as it can comfortably hold GuC/HuC firmware images. - */ - ret = ggtt_reserve_guc_top(ggtt); - if (ret) - goto err_reserve; - - /* Clear any non-preallocated blocks */ - drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); - ggtt->vm.clear_range(&ggtt->vm, hole_start, - hole_end - hole_start); - } - - /* And finally clear the reserved guard page */ - ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); - - if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) { - ret = init_aliasing_ppgtt(dev_priv); - if (ret) - goto err_appgtt; - } - - return 0; - -err_appgtt: - ggtt_release_guc_top(ggtt); -err_reserve: - drm_mm_remove_node(&ggtt->error_capture); - return ret; -} - -/** - * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization - * @dev_priv: i915 device - */ -void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma, *vn; - struct pagevec *pvec; - - ggtt->vm.closed = true; - - mutex_lock(&dev_priv->drm.struct_mutex); - fini_aliasing_ppgtt(dev_priv); - - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) - WARN_ON(i915_vma_unbind(vma)); - - if (drm_mm_node_allocated(&ggtt->error_capture)) - drm_mm_remove_node(&ggtt->error_capture); - - ggtt_release_guc_top(ggtt); - - if (drm_mm_initialized(&ggtt->vm.mm)) { - intel_vgt_deballoon(dev_priv); - i915_address_space_fini(&ggtt->vm); - } - - ggtt->vm.cleanup(&ggtt->vm); - - pvec = &dev_priv->mm.wc_stash.pvec; - if (pvec->nr) { - set_pages_array_wb(pvec->pages, pvec->nr); - __pagevec_release(pvec); - } - - mutex_unlock(&dev_priv->drm.struct_mutex); - - arch_phys_wc_del(ggtt->mtrr); - io_mapping_fini(&ggtt->iomap); - - i915_gem_cleanup_stolen(dev_priv); -} - -static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; - return snb_gmch_ctl << 20; -} - -static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; - if (bdw_gmch_ctl) - bdw_gmch_ctl = 1 << bdw_gmch_ctl; - -#ifdef CONFIG_X86_32 - /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ - if (bdw_gmch_ctl > 4) - bdw_gmch_ctl = 4; -#endif - - return bdw_gmch_ctl << 20; -} - -static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GGMS_MASK; - - if (gmch_ctrl) - return 1 << (20 + gmch_ctrl); - - return 0; -} - -static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - phys_addr_t phys_addr; - int ret; - - /* For Modern GENs the PTEs and register space are split in the BAR */ - phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; - - /* - * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) - ggtt->gsm = ioremap_nocache(phys_addr, size); - else - ggtt->gsm = ioremap_wc(phys_addr, size); - if (!ggtt->gsm) { - DRM_ERROR("Failed to map the ggtt page table\n"); - return -ENOMEM; - } - - ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); - if (ret) { - DRM_ERROR("Scratch setup failed\n"); - /* iounmap will also get called at remove, but meh */ - iounmap(ggtt->gsm); - return ret; - } - - ggtt->vm.scratch_pte = - ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr, - I915_CACHE_NONE, 0); - - return 0; -} - -static struct intel_ppat_entry * -__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value) -{ - struct intel_ppat_entry *entry = &ppat->entries[index]; - - GEM_BUG_ON(index >= ppat->max_entries); - GEM_BUG_ON(test_bit(index, ppat->used)); - - entry->ppat = ppat; - entry->value = value; - kref_init(&entry->ref); - set_bit(index, ppat->used); - set_bit(index, ppat->dirty); - - return entry; -} - -static void __free_ppat_entry(struct intel_ppat_entry *entry) -{ - struct intel_ppat *ppat = entry->ppat; - unsigned int index = entry - ppat->entries; - - GEM_BUG_ON(index >= ppat->max_entries); - GEM_BUG_ON(!test_bit(index, ppat->used)); - - entry->value = ppat->clear_value; - clear_bit(index, ppat->used); - set_bit(index, ppat->dirty); -} - -/** - * intel_ppat_get - get a usable PPAT entry - * @i915: i915 device instance - * @value: the PPAT value required by the caller - * - * The function tries to search if there is an existing PPAT entry which - * matches with the required value. If perfectly matched, the existing PPAT - * entry will be used. If only partially matched, it will try to check if - * there is any available PPAT index. If yes, it will allocate a new PPAT - * index for the required entry and update the HW. If not, the partially - * matched entry will be used. - */ -const struct intel_ppat_entry * -intel_ppat_get(struct drm_i915_private *i915, u8 value) -{ - struct intel_ppat *ppat = &i915->ppat; - struct intel_ppat_entry *entry = NULL; - unsigned int scanned, best_score; - int i; - - GEM_BUG_ON(!ppat->max_entries); - - scanned = best_score = 0; - for_each_set_bit(i, ppat->used, ppat->max_entries) { - unsigned int score; - - score = ppat->match(ppat->entries[i].value, value); - if (score > best_score) { - entry = &ppat->entries[i]; - if (score == INTEL_PPAT_PERFECT_MATCH) { - kref_get(&entry->ref); - return entry; - } - best_score = score; - } - scanned++; - } - - if (scanned == ppat->max_entries) { - if (!entry) - return ERR_PTR(-ENOSPC); - - kref_get(&entry->ref); - return entry; - } - - i = find_first_zero_bit(ppat->used, ppat->max_entries); - entry = __alloc_ppat_entry(ppat, i, value); - ppat->update_hw(i915); - return entry; -} - -static void release_ppat(struct kref *kref) -{ - struct intel_ppat_entry *entry = - container_of(kref, struct intel_ppat_entry, ref); - struct drm_i915_private *i915 = entry->ppat->i915; - - __free_ppat_entry(entry); - entry->ppat->update_hw(i915); -} - -/** - * intel_ppat_put - put back the PPAT entry got from intel_ppat_get() - * @entry: an intel PPAT entry - * - * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the - * entry is dynamically allocated, its reference count will be decreased. Once - * the reference count becomes into zero, the PPAT index becomes free again. - */ -void intel_ppat_put(const struct intel_ppat_entry *entry) -{ - struct intel_ppat *ppat = entry->ppat; - unsigned int index = entry - ppat->entries; - - GEM_BUG_ON(!ppat->max_entries); - - kref_put(&ppat->entries[index].ref, release_ppat); -} - -static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv) -{ - struct intel_ppat *ppat = &dev_priv->ppat; - int i; - - for_each_set_bit(i, ppat->dirty, ppat->max_entries) { - I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value); - clear_bit(i, ppat->dirty); - } -} - -static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv) -{ - struct intel_ppat *ppat = &dev_priv->ppat; - u64 pat = 0; - int i; - - for (i = 0; i < ppat->max_entries; i++) - pat |= GEN8_PPAT(i, ppat->entries[i].value); - - bitmap_clear(ppat->dirty, 0, ppat->max_entries); - - I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); -} - -static unsigned int bdw_private_pat_match(u8 src, u8 dst) -{ - unsigned int score = 0; - enum { - AGE_MATCH = BIT(0), - TC_MATCH = BIT(1), - CA_MATCH = BIT(2), - }; - - /* Cache attribute has to be matched. */ - if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst)) - return 0; - - score |= CA_MATCH; - - if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst)) - score |= TC_MATCH; - - if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst)) - score |= AGE_MATCH; - - if (score == (AGE_MATCH | TC_MATCH | CA_MATCH)) - return INTEL_PPAT_PERFECT_MATCH; - - return score; -} - -static unsigned int chv_private_pat_match(u8 src, u8 dst) -{ - return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ? - INTEL_PPAT_PERFECT_MATCH : 0; -} - -static void cnl_setup_private_ppat(struct intel_ppat *ppat) -{ - ppat->max_entries = 8; - ppat->update_hw = cnl_private_pat_update_hw; - ppat->match = bdw_private_pat_match; - ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); - - __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); - __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); - __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); - __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); -} - -/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability - * bits. When using advanced contexts each context stores its own PAT, but - * writing this data shouldn't be harmful even in those cases. */ -static void bdw_setup_private_ppat(struct intel_ppat *ppat) -{ - ppat->max_entries = 8; - ppat->update_hw = bdw_private_pat_update_hw; - ppat->match = bdw_private_pat_match; - ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); - - if (!HAS_PPGTT(ppat->i915)) { - /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, - * so RTL will always use the value corresponding to - * pat_sel = 000". - * So let's disable cache for GGTT to avoid screen corruptions. - * MOCS still can be used though. - * - System agent ggtt writes (i.e. cpu gtt mmaps) already work - * before this patch, i.e. the same uncached + snooping access - * like on gen6/7 seems to be in effect. - * - So this just fixes blitter/render access. Again it looks - * like it's not just uncached access, but uncached + snooping. - * So we can still hold onto all our assumptions wrt cpu - * clflushing on LLC machines. - */ - __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC); - return; - } - - __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */ - __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */ - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */ - __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */ - __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); -} - -static void chv_setup_private_ppat(struct intel_ppat *ppat) -{ - ppat->max_entries = 8; - ppat->update_hw = bdw_private_pat_update_hw; - ppat->match = chv_private_pat_match; - ppat->clear_value = CHV_PPAT_SNOOP; - - /* - * Map WB on BDW to snooped on CHV. - * - * Only the snoop bit has meaning for CHV, the rest is - * ignored. - * - * The hardware will never snoop for certain types of accesses: - * - CPU GTT (GMADR->GGTT->no snoop->memory) - * - PPGTT page tables - * - some other special cycles - * - * As with BDW, we also need to consider the following for GT accesses: - * "For GGTT, there is NO pat_sel[2:0] from the entry, - * so RTL will always use the value corresponding to - * pat_sel = 000". - * Which means we must set the snoop bit in PAT entry 0 - * in order to keep the global status page working. - */ - - __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP); - __alloc_ppat_entry(ppat, 1, 0); - __alloc_ppat_entry(ppat, 2, 0); - __alloc_ppat_entry(ppat, 3, 0); - __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP); - __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP); - __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP); - __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP); -} - -static void gen6_gmch_remove(struct i915_address_space *vm) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - - iounmap(ggtt->gsm); - cleanup_scratch_page(vm); -} - -static void setup_private_pat(struct drm_i915_private *dev_priv) -{ - struct intel_ppat *ppat = &dev_priv->ppat; - int i; - - ppat->i915 = dev_priv; - - if (INTEL_GEN(dev_priv) >= 10) - cnl_setup_private_ppat(ppat); - else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - chv_setup_private_ppat(ppat); - else - bdw_setup_private_ppat(ppat); - - GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES); - - for_each_clear_bit(i, ppat->used, ppat->max_entries) { - ppat->entries[i].value = ppat->clear_value; - ppat->entries[i].ppat = ppat; - set_bit(i, ppat->dirty); - } - - ppat->update_hw(dev_priv); -} - -static int gen8_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int size; - u16 snb_gmch_ctl; - int err; - - /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), - pci_resource_len(pdev, 2)); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (IS_CHERRYVIEW(dev_priv)) - size = chv_get_total_gtt_size(snb_gmch_ctl); - else - size = gen8_get_total_gtt_size(snb_gmch_ctl); - - ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.cleanup = gen6_gmch_remove; - ggtt->vm.insert_page = gen8_ggtt_insert_page; - ggtt->vm.clear_range = nop_clear_range; - if (intel_scanout_needs_vtd_wa(dev_priv)) - ggtt->vm.clear_range = gen8_ggtt_clear_range; - - ggtt->vm.insert_entries = gen8_ggtt_insert_entries; - - /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ - if (intel_ggtt_update_needs_vtd_wa(dev_priv) || - IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) { - ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; - ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - if (ggtt->vm.clear_range != nop_clear_range) - ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; - - /* Prevent recursively calling stop_machine() and deadlocks. */ - dev_info(dev_priv->drm.dev, - "Disabling error capture for VT-d workaround\n"); - i915_disable_error_state(dev_priv, -ENODEV); - } - - ggtt->invalidate = gen6_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - ggtt->vm.pte_encode = gen8_pte_encode; - - setup_private_pat(dev_priv); - - return ggtt_probe_common(ggtt, size); -} - -static int gen6_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int size; - u16 snb_gmch_ctl; - int err; - - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), - pci_resource_len(pdev, 2)); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - - /* 64/512MB is the current min/max we actually know of, but this is just - * a coarse sanity check. - */ - if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); - return -ENXIO; - } - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - - ggtt->vm.clear_range = nop_clear_range; - if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) - ggtt->vm.clear_range = gen6_ggtt_clear_range; - ggtt->vm.insert_page = gen6_ggtt_insert_page; - ggtt->vm.insert_entries = gen6_ggtt_insert_entries; - ggtt->vm.cleanup = gen6_gmch_remove; - - ggtt->invalidate = gen6_ggtt_invalidate; - - if (HAS_EDRAM(dev_priv)) - ggtt->vm.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev_priv)) - ggtt->vm.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev_priv)) - ggtt->vm.pte_encode = byt_pte_encode; - else if (INTEL_GEN(dev_priv) >= 7) - ggtt->vm.pte_encode = ivb_pte_encode; - else - ggtt->vm.pte_encode = snb_pte_encode; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - return ggtt_probe_common(ggtt, size); -} - -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - -static int i915_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - phys_addr_t gmadr_base; - int ret; - - ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - return -EIO; - } - - intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); - - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(gmadr_base, - ggtt->mappable_end); - - ggtt->do_idle_maps = needs_idle_maps(dev_priv); - ggtt->vm.insert_page = i915_ggtt_insert_page; - ggtt->vm.insert_entries = i915_ggtt_insert_entries; - ggtt->vm.clear_range = i915_ggtt_clear_range; - ggtt->vm.cleanup = i915_gmch_remove; - - ggtt->invalidate = gmch_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - if (unlikely(ggtt->do_idle_maps)) - DRM_INFO("applying Ironlake quirks for intel_iommu\n"); - - return 0; -} - -/** - * i915_ggtt_probe_hw - Probe GGTT hardware location - * @dev_priv: i915 device - */ -int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - int ret; - - ggtt->vm.i915 = dev_priv; - ggtt->vm.dma = &dev_priv->drm.pdev->dev; - - if (INTEL_GEN(dev_priv) <= 5) - ret = i915_gmch_probe(ggtt); - else if (INTEL_GEN(dev_priv) < 8) - ret = gen6_gmch_probe(ggtt); - else - ret = gen8_gmch_probe(ggtt); - if (ret) - return ret; - - if ((ggtt->vm.total - 1) >> 32) { - DRM_ERROR("We never expected a Global GTT with more than 32bits" - " of address space! Found %lldM!\n", - ggtt->vm.total >> 20); - ggtt->vm.total = 1ULL << 32; - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - - if (ggtt->mappable_end > ggtt->vm.total) { - DRM_ERROR("mappable aperture extends past end of GGTT," - " aperture=%pa, total=%llx\n", - &ggtt->mappable_end, ggtt->vm.total); - ggtt->mappable_end = ggtt->vm.total; - } - - /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); - DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("DSM size = %lluM\n", - (u64)resource_size(&intel_graphics_stolen_res) >> 20); - if (intel_vtd_active()) - DRM_INFO("VT-d active for gfx access\n"); - - return 0; -} - -/** - * i915_ggtt_init_hw - Initialize GGTT hardware - * @dev_priv: i915 device - */ -int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - int ret; - - stash_init(&dev_priv->mm.wc_stash); - - /* Note that we use page colouring to enforce a guard page at the - * end of the address space. This is required as the CS may prefetch - * beyond the end of the batch buffer, across the page boundary, - * and beyond the end of the GTT if we do not provide a guard. - */ - mutex_lock(&dev_priv->drm.struct_mutex); - i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - - ggtt->vm.is_ggtt = true; - - /* Only VLV supports read-only GGTT mappings */ - ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv); - - if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv)) - ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; - mutex_unlock(&dev_priv->drm.struct_mutex); - - if (!io_mapping_init_wc(&dev_priv->ggtt.iomap, - dev_priv->ggtt.gmadr.start, - dev_priv->ggtt.mappable_end)) { - ret = -EIO; - goto out_gtt_cleanup; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); - - i915_ggtt_init_fences(ggtt); - - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev_priv); - if (ret) - goto out_gtt_cleanup; - - return 0; - -out_gtt_cleanup: - ggtt->vm.cleanup(&ggtt->vm); - return ret; -} - -int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) - return -EIO; - - return 0; -} - -void i915_ggtt_enable_guc(struct drm_i915_private *i915) -{ - GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate); - - i915->ggtt.invalidate = guc_ggtt_invalidate; - - i915_ggtt_invalidate(i915); -} - -void i915_ggtt_disable_guc(struct drm_i915_private *i915) -{ - /* XXX Temporary pardon for error unload */ - if (i915->ggtt.invalidate == gen6_ggtt_invalidate) - return; - - /* We should only be called after i915_ggtt_enable_guc() */ - GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate); - - i915->ggtt.invalidate = gen6_ggtt_invalidate; - - i915_ggtt_invalidate(i915); -} - -void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma, *vn; - - i915_check_and_clear_faults(dev_priv); - - mutex_lock(&ggtt->vm.mutex); - - /* First fill our portion of the GTT with scratch pages */ - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ - - /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { - struct drm_i915_gem_object *obj = vma->obj; - - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) - continue; - - mutex_unlock(&ggtt->vm.mutex); - - if (!i915_vma_unbind(vma)) - goto lock; - - WARN_ON(i915_vma_bind(vma, - obj ? obj->cache_level : 0, - PIN_UPDATE)); - if (obj) { - i915_gem_object_lock(obj); - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - i915_gem_object_unlock(obj); - } - -lock: - mutex_lock(&ggtt->vm.mutex); - } - - ggtt->vm.closed = false; - i915_ggtt_invalidate(dev_priv); - - mutex_unlock(&ggtt->vm.mutex); - - if (INTEL_GEN(dev_priv) >= 8) { - struct intel_ppat *ppat = &dev_priv->ppat; - - bitmap_set(ppat->dirty, 0, ppat->max_entries); - dev_priv->ppat.update_hw(dev_priv); - return; - } -} - -static struct scatterlist * -rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int column, row; - unsigned int src_idx; - - for (column = 0; column < width; column++) { - src_idx = stride * (height - 1) + column + offset; - for (row = 0; row < height; row++) { - st->nents++; - /* We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); - sg_dma_address(sg) = - i915_gem_object_get_dma_address(obj, src_idx); - sg_dma_len(sg) = I915_GTT_PAGE_SIZE; - sg = sg_next(sg); - src_idx -= stride; - } - } - - return sg; -} - -static noinline struct sg_table * -intel_rotate_pages(struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_rotation_info_size(rot_info); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { - sg = rotate_pages(obj, rot_info->plane[i].offset, - rot_info->plane[i].width, rot_info->plane[i].height, - rot_info->plane[i].stride, st, sg); - } - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static struct scatterlist * -remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int row; - - for (row = 0; row < height; row++) { - unsigned int left = width * I915_GTT_PAGE_SIZE; - - while (left) { - dma_addr_t addr; - unsigned int length; - - /* We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - - addr = i915_gem_object_get_dma_address_len(obj, offset, &length); - - length = min(left, length); - - st->nents++; - - sg_set_page(sg, NULL, length, 0); - sg_dma_address(sg) = addr; - sg_dma_len(sg) = length; - sg = sg_next(sg); - - offset += length / I915_GTT_PAGE_SIZE; - left -= length; - } - - offset += stride - width; - } - - return sg; -} - -static noinline struct sg_table * -intel_remap_pages(struct intel_remapped_info *rem_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_remapped_info_size(rem_info); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { - sg = remap_pages(obj, rem_info->plane[i].offset, - rem_info->plane[i].width, rem_info->plane[i].height, - rem_info->plane[i].stride, st, sg); - } - - i915_sg_trim(st); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static noinline struct sg_table * -intel_partial_pages(const struct i915_ggtt_view *view, - struct drm_i915_gem_object *obj) -{ - struct sg_table *st; - struct scatterlist *sg, *iter; - unsigned int count = view->partial.size; - unsigned int offset; - int ret = -ENOMEM; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, count, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); - GEM_BUG_ON(!iter); - - sg = st->sgl; - st->nents = 0; - do { - unsigned int len; - - len = min(iter->length - (offset << PAGE_SHIFT), - count << PAGE_SHIFT); - sg_set_page(sg, NULL, len, 0); - sg_dma_address(sg) = - sg_dma_address(iter) + (offset << PAGE_SHIFT); - sg_dma_len(sg) = len; - - st->nents++; - count -= len >> PAGE_SHIFT; - if (count == 0) { - sg_mark_end(sg); - i915_sg_trim(st); /* Drop any unused tail entries. */ - - return st; - } - - sg = __sg_next(sg); - iter = __sg_next(iter); - offset = 0; - } while (1); - -err_sg_alloc: - kfree(st); -err_st_alloc: - return ERR_PTR(ret); -} - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma) -{ - int ret; - - /* The vma->pages are only valid within the lifespan of the borrowed - * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so - * must be the vma->pages. A simple rule is that vma->pages must only - * be accessed when the obj->mm.pages are pinned. - */ - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - - switch (vma->ggtt_view.type) { - default: - GEM_BUG_ON(vma->ggtt_view.type); - /* fall through */ - case I915_GGTT_VIEW_NORMAL: - vma->pages = vma->obj->mm.pages; - return 0; - - case I915_GGTT_VIEW_ROTATED: - vma->pages = - intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); - break; - - case I915_GGTT_VIEW_REMAPPED: - vma->pages = - intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); - break; - - case I915_GGTT_VIEW_PARTIAL: - vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - break; - } - - ret = 0; - if (IS_ERR(vma->pages)) { - ret = PTR_ERR(vma->pages); - vma->pages = NULL; - DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); - } - return ret; -} - /** * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) * @vm: the &struct i915_address_space @@ -3953,7 +108,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); GEM_BUG_ON(range_overflows(offset, size, vm->total)); - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); + GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); GEM_BUG_ON(drm_mm_node_allocated(node)); node->size = size; @@ -4042,7 +197,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 offset; int err; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); + GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(alignment && !is_power_of_2(alignment)); @@ -4050,7 +206,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, GEM_BUG_ON(start >= end); GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); + GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); GEM_BUG_ON(drm_mm_node_allocated(node)); if (unlikely(range_overflows(start, size, end))) @@ -4093,7 +249,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (flags & PIN_NOEVICT) return -ENOSPC; - /* No free space, pick a slot at random. + /* + * No free space, pick a slot at random. * * There is a pathological case here using a GTT shared between * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): @@ -4121,6 +278,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + if (flags & PIN_NOSEARCH) + return -ENOSPC; + /* Randomly selected placement is pinned, do a search */ err = i915_gem_evict_something(vm, size, alignment, color, start, end, flags); @@ -4133,6 +293,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_gtt.c" #include "selftests/i915_gem_gtt.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 812717ccc69b..f6226df9f972 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -1,682 +1,21 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Please try to maintain the following order within this file unless it makes - * sense to do otherwise. From top to bottom: - * 1. typedefs - * 2. #defines, and macros - * 3. structure definitions - * 4. function prototypes - * - * Within each section, please try to order by generation in ascending order, - * from top to bottom (ie. gen6 on the top, gen8 on the bottom). + * Copyright © 2020 Intel Corporation */ #ifndef __I915_GEM_GTT_H__ #define __I915_GEM_GTT_H__ #include <linux/io-mapping.h> -#include <linux/mm.h> -#include <linux/pagevec.h> +#include <linux/types.h> -#include "gt/intel_reset.h" -#include "i915_gem_fence_reg.h" -#include "i915_request.h" -#include "i915_scatterlist.h" -#include "i915_selftest.h" -#include "i915_timeline.h" - -#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) -#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) -#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) - -#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K -#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M - -#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE +#include <drm/drm_mm.h> -#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE - -#define I915_FENCE_REG_NONE -1 -#define I915_MAX_NUM_FENCES 32 -/* 32 fences + sign bit for FENCE_REG_NONE */ -#define I915_MAX_NUM_FENCE_BITS 6 +#include "gt/intel_gtt.h" +#include "i915_scatterlist.h" -struct drm_i915_file_private; struct drm_i915_gem_object; -struct i915_vma; - -typedef u32 gen6_pte_t; -typedef u64 gen8_pte_t; -typedef u64 gen8_pde_t; -typedef u64 gen8_ppgtt_pdpe_t; -typedef u64 gen8_ppgtt_pml4e_t; - -#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) - -/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ -#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) -#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) -#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) -#define GEN6_PTE_CACHE_LLC (2 << 1) -#define GEN6_PTE_UNCACHED (1 << 1) -#define GEN6_PTE_VALID (1 << 0) - -#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) -#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) -#define I915_PDES 512 -#define I915_PDE_MASK (I915_PDES - 1) -#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) - -#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) -#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) -#define GEN6_PD_ALIGN (PAGE_SIZE * 16) -#define GEN6_PDE_SHIFT 22 -#define GEN6_PDE_VALID (1 << 0) - -#define GEN7_PTE_CACHE_L3_LLC (3 << 1) - -#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) -#define BYT_PTE_WRITEABLE (1 << 1) - -/* Cacheability Control is a 4-bit value. The low three bits are stored in bits - * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. - */ -#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ - (((bits) & 0x8) << (11 - 3))) -#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) -#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) -#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) -#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) -#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) -#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) -#define HSW_PTE_UNCACHED (0) -#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) -#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) - -/* GEN8 32b style address is defined as a 3 level page table: - * 31:30 | 29:21 | 20:12 | 11:0 - * PDPE | PDE | PTE | offset - * The difference as compared to normal x86 3 level page table is the PDPEs are - * programmed via register. - */ -#define GEN8_3LVL_PDPES 4 -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) - -/* GEN8 48b style address is defined as a 4 level page table: - * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 - * PML4E | PDPE | PDE | PTE | offset - */ -#define GEN8_PML4ES_PER_PML4 512 -#define GEN8_PML4E_SHIFT 39 -#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) -#define GEN8_PDPE_SHIFT 30 -/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page - * tables */ -#define GEN8_PDPE_MASK 0x1ff - -#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) -#define PPAT_CACHED_PDE 0 /* WB LLC */ -#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ -#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ - -#define CHV_PPAT_SNOOP (1<<6) -#define GEN8_PPAT_AGE(x) ((x)<<4) -#define GEN8_PPAT_LLCeLLC (3<<2) -#define GEN8_PPAT_LLCELLC (2<<2) -#define GEN8_PPAT_LLC (1<<2) -#define GEN8_PPAT_WB (3<<0) -#define GEN8_PPAT_WT (2<<0) -#define GEN8_PPAT_WC (1<<0) -#define GEN8_PPAT_UC (0<<0) -#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) - -#define GEN8_PPAT_GET_CA(x) ((x) & 3) -#define GEN8_PPAT_GET_TC(x) ((x) & (3 << 2)) -#define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) -#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) - -#define GEN8_PDE_IPS_64K BIT(11) -#define GEN8_PDE_PS_2M BIT(7) - -#define for_each_sgt_dma(__dmap, __iter, __sgt) \ - __for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE) - -struct intel_remapped_plane_info { - /* in gtt pages */ - unsigned int width, height, stride, offset; -} __packed; - -struct intel_remapped_info { - struct intel_remapped_plane_info plane[2]; - unsigned int unused_mbz; -} __packed; - -struct intel_rotation_info { - struct intel_remapped_plane_info plane[2]; -} __packed; - -struct intel_partial_info { - u64 offset; - unsigned int size; -} __packed; - -enum i915_ggtt_view_type { - I915_GGTT_VIEW_NORMAL = 0, - I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), - I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), - I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), -}; - -static inline void assert_i915_gem_gtt_types(void) -{ - BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); - BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); - BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int)); - - /* Check that rotation/remapped shares offsets for simplicity */ - BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != - offsetof(struct intel_rotation_info, plane[0])); - BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != - offsetofend(struct intel_rotation_info, plane[1])); - - /* As we encode the size of each branch inside the union into its type, - * we have to be careful that each branch has a unique size. - */ - switch ((enum i915_ggtt_view_type)0) { - case I915_GGTT_VIEW_NORMAL: - case I915_GGTT_VIEW_PARTIAL: - case I915_GGTT_VIEW_ROTATED: - case I915_GGTT_VIEW_REMAPPED: - /* gcc complains if these are identical cases */ - break; - } -} - -struct i915_ggtt_view { - enum i915_ggtt_view_type type; - union { - /* Members need to contain no holes/padding */ - struct intel_partial_info partial; - struct intel_rotation_info rotated; - struct intel_remapped_info remapped; - }; -}; - -enum i915_cache_level; - -struct i915_vma; - -struct i915_page_dma { - struct page *page; - union { - dma_addr_t daddr; - - /* For gen6/gen7 only. This is the offset in the GGTT - * where the page directory entries for PPGTT begin - */ - u32 ggtt_offset; - }; -}; - -#define px_base(px) (&(px)->base) -#define px_dma(px) (px_base(px)->daddr) - -struct i915_page_table { - struct i915_page_dma base; - atomic_t used; -}; - -struct i915_page_directory { - struct i915_page_dma base; - atomic_t used; - spinlock_t lock; - void *entry[512]; -}; - -struct i915_vma_ops { - /* Map an object into an address space with the given cache flags. */ - int (*bind_vma)(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); - /* - * Unmap an object from an address space. This usually consists of - * setting the valid PTE entries to a reserved scratch page. - */ - void (*unbind_vma)(struct i915_vma *vma); - - int (*set_pages)(struct i915_vma *vma); - void (*clear_pages)(struct i915_vma *vma); -}; - -struct pagestash { - spinlock_t lock; - struct pagevec pvec; -}; - -struct i915_address_space { - struct kref ref; - - struct drm_mm mm; - struct drm_i915_private *i915; - struct device *dma; - /* Every address space belongs to a struct file - except for the global - * GTT that is owned by the driver (and so @file is set to NULL). In - * principle, no information should leak from one context to another - * (or between files/processes etc) unless explicitly shared by the - * owner. Tracking the owner is important in order to free up per-file - * objects along with the file, to aide resource tracking, and to - * assign blame. - */ - struct drm_i915_file_private *file; - u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - u64 reserved; /* size addr space reserved */ - - bool closed; - - struct mutex mutex; /* protects vma and our lists */ -#define VM_CLASS_GGTT 0 -#define VM_CLASS_PPGTT 1 - - u64 scratch_pte; - int scratch_order; - struct i915_page_dma scratch_page; - struct i915_page_table *scratch_pt; - struct i915_page_directory *scratch_pd; - struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */ - - /** - * List of vma currently bound. - */ - struct list_head bound_list; - - /** - * List of vma that are not unbound. - */ - struct list_head unbound_list; - - struct pagestash free_pages; - - /* Global GTT */ - bool is_ggtt:1; - - /* Some systems require uncached updates of the page directories */ - bool pt_kmap_wc:1; - - /* Some systems support read-only mappings for GGTT and/or PPGTT */ - bool has_read_only:1; - - u64 (*pte_encode)(dma_addr_t addr, - enum i915_cache_level level, - u32 flags); /* Create a valid PTE */ -#define PTE_READ_ONLY (1<<0) - - int (*allocate_va_range)(struct i915_address_space *vm, - u64 start, u64 length); - void (*clear_range)(struct i915_address_space *vm, - u64 start, u64 length); - void (*insert_page)(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 flags); - void (*insert_entries)(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); - void (*cleanup)(struct i915_address_space *vm); - - struct i915_vma_ops vma_ops; - - I915_SELFTEST_DECLARE(struct fault_attr fault_attr); - I915_SELFTEST_DECLARE(bool scrub_64K); -}; - -#define i915_is_ggtt(vm) ((vm)->is_ggtt) - -static inline bool -i915_vm_is_4lvl(const struct i915_address_space *vm) -{ - return (vm->total - 1) >> 32; -} - -static inline bool -i915_vm_has_scratch_64K(struct i915_address_space *vm) -{ - return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); -} - -/* The Graphics Translation Table is the way in which GEN hardware translates a - * Graphics Virtual Address into a Physical Address. In addition to the normal - * collateral associated with any va->pa translations GEN hardware also has a - * portion of the GTT which can be mapped by the CPU and remain both coherent - * and correct (in cases like swizzling). That region is referred to as GMADR in - * the spec. - */ -struct i915_ggtt { - struct i915_address_space vm; - - struct io_mapping iomap; /* Mapping to our CPU mappable region */ - struct resource gmadr; /* GMADR resource */ - resource_size_t mappable_end; /* End offset that we can CPU map */ - - /** "Graphics Stolen Memory" holds the global PTEs */ - void __iomem *gsm; - void (*invalidate)(struct drm_i915_private *dev_priv); - - bool do_idle_maps; - - int mtrr; - - u32 pin_bias; - - unsigned int num_fences; - struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; - struct list_head fence_list; - - /** List of all objects in gtt_space, currently mmaped by userspace. - * All objects within this list must also be on bound_list. - */ - struct list_head userfault_list; - - /* Manual runtime pm autosuspend delay for user GGTT mmaps */ - struct intel_wakeref_auto userfault_wakeref; - - struct drm_mm_node error_capture; - struct drm_mm_node uc_fw; -}; - -struct i915_ppgtt { - struct i915_address_space vm; - - intel_engine_mask_t pd_dirty_engines; - struct i915_page_directory *pd; -}; - -struct gen6_ppgtt { - struct i915_ppgtt base; - - struct i915_vma *vma; - gen6_pte_t __iomem *pd_addr; - - unsigned int pin_count; - bool scan_for_unused_pt; - - struct gen6_ppgtt_cleanup_work *work; -}; - -#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) - -static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) -{ - BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base)); - return __to_gen6_ppgtt(base); -} - -/* - * gen6_for_each_pde() iterates over every pde from start until start+length. - * If start and start+length are not perfectly divisible, the macro will round - * down and up as needed. Start=0 and length=2G effectively iterates over - * every PDE in the system. The macro modifies ALL its parameters except 'pd', - * so each of the other parameters should preferably be a simple variable, or - * at most an lvalue with no side-effects! - */ -#define gen6_for_each_pde(pt, pd, start, length, iter) \ - for (iter = gen6_pde_index(start); \ - length > 0 && iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen6_for_all_pdes(pt, pd, iter) \ - for (iter = 0; \ - iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ++iter) - -static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) -{ - const u32 mask = NUM_PTE(pde_shift) - 1; - - return (address >> PAGE_SHIFT) & mask; -} - -/* Helper to counts the number of PTEs within the given length. This count - * does not cross a page table boundary, so the max value would be - * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. -*/ -static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) -{ - const u64 mask = ~((1ULL << pde_shift) - 1); - u64 end; - - GEM_BUG_ON(length == 0); - GEM_BUG_ON(offset_in_page(addr | length)); - - end = addr + length; - - if ((addr & mask) != (end & mask)) - return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); - - return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); -} - -static inline u32 i915_pde_index(u64 addr, u32 shift) -{ - return (addr >> shift) & I915_PDE_MASK; -} - -static inline u32 gen6_pte_index(u32 addr) -{ - return i915_pte_index(addr, GEN6_PDE_SHIFT); -} - -static inline u32 gen6_pte_count(u32 addr, u32 length) -{ - return i915_pte_count(addr, length, GEN6_PDE_SHIFT); -} - -static inline u32 gen6_pde_index(u32 addr) -{ - return i915_pde_index(addr, GEN6_PDE_SHIFT); -} - -static inline unsigned int -i915_pdpes_per_pdp(const struct i915_address_space *vm) -{ - if (i915_vm_is_4lvl(vm)) - return GEN8_PML4ES_PER_PML4; - - return GEN8_3LVL_PDPES; -} - -static inline struct i915_page_table * -i915_pt_entry(const struct i915_page_directory * const pd, - const unsigned short n) -{ - return pd->entry[n]; -} - -static inline struct i915_page_directory * -i915_pd_entry(const struct i915_page_directory * const pdp, - const unsigned short n) -{ - return pdp->entry[n]; -} - -static inline struct i915_page_directory * -i915_pdp_entry(const struct i915_page_directory * const pml4, - const unsigned short n) -{ - return pml4->entry[n]; -} - -/* Equivalent to the gen6 version, For each pde iterates over every pde - * between from start until start + length. On gen8+ it simply iterates - * over every page directory entry in a page directory. - */ -#define gen8_for_each_pde(pt, pd, start, length, iter) \ - for (iter = gen8_pde_index(start); \ - length > 0 && iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ - for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < i915_pdpes_per_pdp(vm) && \ - (pd = i915_pd_entry(pdp, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen8_for_each_pml4e(pdp, pml4, start, length, iter) \ - for (iter = gen8_pml4e_index(start); \ - length > 0 && iter < GEN8_PML4ES_PER_PML4 && \ - (pdp = i915_pdp_entry(pml4, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -static inline u32 gen8_pte_index(u64 address) -{ - return i915_pte_index(address, GEN8_PDE_SHIFT); -} - -static inline u32 gen8_pde_index(u64 address) -{ - return i915_pde_index(address, GEN8_PDE_SHIFT); -} - -static inline u32 gen8_pdpe_index(u64 address) -{ - return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; -} - -static inline u32 gen8_pml4e_index(u64 address) -{ - return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; -} - -static inline u64 gen8_pte_count(u64 address, u64 length) -{ - return i915_pte_count(address, length, GEN8_PDE_SHIFT); -} - -static inline dma_addr_t -i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) -{ - struct i915_page_directory *pd; - - pd = i915_pdp_entry(ppgtt->pd, n); - return px_dma(pd); -} - -static inline struct i915_ggtt * -i915_vm_to_ggtt(struct i915_address_space *vm) -{ - BUILD_BUG_ON(offsetof(struct i915_ggtt, vm)); - GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, vm); -} - -static inline struct i915_ppgtt * -i915_vm_to_ppgtt(struct i915_address_space *vm) -{ - BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm)); - GEM_BUG_ON(i915_is_ggtt(vm)); - return container_of(vm, struct i915_ppgtt, vm); -} - -#define INTEL_MAX_PPAT_ENTRIES 8 -#define INTEL_PPAT_PERFECT_MATCH (~0U) - -struct intel_ppat; - -struct intel_ppat_entry { - struct intel_ppat *ppat; - struct kref ref; - u8 value; -}; - -struct intel_ppat { - struct intel_ppat_entry entries[INTEL_MAX_PPAT_ENTRIES]; - DECLARE_BITMAP(used, INTEL_MAX_PPAT_ENTRIES); - DECLARE_BITMAP(dirty, INTEL_MAX_PPAT_ENTRIES); - unsigned int max_entries; - u8 clear_value; - /* - * Return a score to show how two PPAT values match, - * a INTEL_PPAT_PERFECT_MATCH indicates a perfect match - */ - unsigned int (*match)(u8 src, u8 dst); - void (*update_hw)(struct drm_i915_private *i915); - - struct drm_i915_private *i915; -}; - -const struct intel_ppat_entry * -intel_ppat_get(struct drm_i915_private *i915, u8 value); -void intel_ppat_put(const struct intel_ppat_entry *entry); - -int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); -int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); -int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); -void i915_ggtt_enable_guc(struct drm_i915_private *i915); -void i915_ggtt_disable_guc(struct drm_i915_private *i915); -int i915_gem_init_ggtt(struct drm_i915_private *dev_priv); -void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); - -int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv); - -struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv); - -static inline struct i915_address_space * -i915_vm_get(struct i915_address_space *vm) -{ - kref_get(&vm->ref); - return vm; -} - -void i915_vm_release(struct kref *kref); - -static inline void i915_vm_put(struct i915_address_space *vm) -{ - kref_put(&vm->ref, i915_vm_release); -} - -int gen6_ppgtt_pin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); - -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv); -void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv); +struct i915_address_space; int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages); @@ -694,20 +33,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags); /* Flags used by pin/bind&friends. */ -#define PIN_NONBLOCK BIT_ULL(0) -#define PIN_NONFAULT BIT_ULL(1) -#define PIN_NOEVICT BIT_ULL(2) +#define PIN_NOEVICT BIT_ULL(0) +#define PIN_NOSEARCH BIT_ULL(1) +#define PIN_NONBLOCK BIT_ULL(2) #define PIN_MAPPABLE BIT_ULL(3) #define PIN_ZONE_4G BIT_ULL(4) #define PIN_HIGH BIT_ULL(5) #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(11) +#define PIN_UPDATE BIT_ULL(9) +#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ -#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) +#define PIN_OFFSET_MASK I915_GTT_PAGE_MASK #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h deleted file mode 100644 index 112cda8fa1a8..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _I915_GEM_RENDER_STATE_H_ -#define _I915_GEM_RENDER_STATE_H_ - -struct i915_request; - -int i915_gem_render_state_emit(struct i915_request *rq); - -#endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c new file mode 100644 index 000000000000..54fce81d5724 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -0,0 +1,173 @@ +/* + * SPDX-License-Identifier: MIT + */ + +#include "gem/i915_gem_mman.h" +#include "gt/intel_engine_user.h" + +#include "i915_drv.h" +#include "i915_perf.h" + +int i915_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_private *i915 = to_i915(dev); + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + drm_i915_getparam_t *param = data; + int value; + + switch (param->param) { + case I915_PARAM_IRQ_ACTIVE: + case I915_PARAM_ALLOW_BATCHBUFFER: + case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: + /* Reject all old ums/dri params. */ + return -ENODEV; + case I915_PARAM_CHIPSET_ID: + value = i915->drm.pdev->device; + break; + case I915_PARAM_REVISION: + value = i915->drm.pdev->revision; + break; + case I915_PARAM_NUM_FENCES_AVAIL: + value = i915->ggtt.num_fences; + break; + case I915_PARAM_HAS_OVERLAY: + value = !!i915->overlay; + break; + case I915_PARAM_HAS_BSD: + value = !!intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_VIDEO, 0); + break; + case I915_PARAM_HAS_BLT: + value = !!intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, 0); + break; + case I915_PARAM_HAS_VEBOX: + value = !!intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_VIDEO_ENHANCE, 0); + break; + case I915_PARAM_HAS_BSD2: + value = !!intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_VIDEO, 1); + break; + case I915_PARAM_HAS_LLC: + value = HAS_LLC(i915); + break; + case I915_PARAM_HAS_WT: + value = HAS_WT(i915); + break; + case I915_PARAM_HAS_ALIASING_PPGTT: + value = INTEL_PPGTT(i915); + break; + case I915_PARAM_HAS_SEMAPHORES: + value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); + break; + case I915_PARAM_HAS_SECURE_BATCHES: + value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); + break; + case I915_PARAM_CMD_PARSER_VERSION: + value = i915_cmd_parser_get_version(i915); + break; + case I915_PARAM_SUBSLICE_TOTAL: + value = intel_sseu_subslice_total(sseu); + if (!value) + return -ENODEV; + break; + case I915_PARAM_EU_TOTAL: + value = sseu->eu_total; + if (!value) + return -ENODEV; + break; + case I915_PARAM_HAS_GPU_RESET: + value = i915_modparams.enable_hangcheck && + intel_has_gpu_reset(&i915->gt); + if (value && intel_has_reset_engine(&i915->gt)) + value = 2; + break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = 0; + break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(i915); + break; + case I915_PARAM_MIN_EU_IN_POOL: + value = sseu->min_eu_in_pool; + break; + case I915_PARAM_HUC_STATUS: + value = intel_huc_check_status(&i915->gt.uc.huc); + if (value < 0) + return value; + break; + case I915_PARAM_MMAP_GTT_VERSION: + /* Though we've started our numbering from 1, and so class all + * earlier versions as 0, in effect their value is undefined as + * the ioctl will report EINVAL for the unknown param! + */ + value = i915_gem_mmap_gtt_version(); + break; + case I915_PARAM_HAS_SCHEDULER: + value = i915->caps.scheduler; + break; + + case I915_PARAM_MMAP_VERSION: + /* Remember to bump this if the version changes! */ + case I915_PARAM_HAS_GEM: + case I915_PARAM_HAS_PAGEFLIPPING: + case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */ + case I915_PARAM_HAS_RELAXED_FENCING: + case I915_PARAM_HAS_COHERENT_RINGS: + case I915_PARAM_HAS_RELAXED_DELTA: + case I915_PARAM_HAS_GEN7_SOL_RESET: + case I915_PARAM_HAS_WAIT_TIMEOUT: + case I915_PARAM_HAS_PRIME_VMAP_FLUSH: + case I915_PARAM_HAS_PINNED_BATCHES: + case I915_PARAM_HAS_EXEC_NO_RELOC: + case I915_PARAM_HAS_EXEC_HANDLE_LUT: + case I915_PARAM_HAS_COHERENT_PHYS_GTT: + case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: + case I915_PARAM_HAS_EXEC_CAPTURE: + case I915_PARAM_HAS_EXEC_BATCH_FIRST: + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: + case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + /* For the time being all of these are always true; + * if some supported hardware does not have one of these + * features this value needs to be provided from + * INTEL_INFO(), a feature macro, or similar. + */ + value = 1; + break; + case I915_PARAM_HAS_CONTEXT_ISOLATION: + value = intel_engines_has_context_isolation(i915); + break; + case I915_PARAM_SLICE_MASK: + value = sseu->slice_mask; + if (!value) + return -ENODEV; + break; + case I915_PARAM_SUBSLICE_MASK: + value = sseu->subslice_mask[0]; + if (!value) + return -ENODEV; + break; + case I915_PARAM_CS_TIMESTAMP_FREQUENCY: + value = 1000 * RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + break; + case I915_PARAM_MMAP_GTT_COHERENT: + value = INTEL_INFO(i915)->has_coherent_ggtt; + break; + case I915_PARAM_PERF_REVISION: + value = i915_perf_ioctl_version(); + break; + default: + DRM_DEBUG("Unknown parameter %d\n", param->param); + return -EINVAL; + } + + if (put_user(value, param->value)) + return -EFAULT; + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 2d5fcba98841..3aa213684293 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -20,7 +20,10 @@ static LIST_HEAD(globals); static atomic_t active; static atomic_t epoch; static struct park_work { - struct rcu_work work; + struct delayed_work work; + struct rcu_head rcu; + unsigned long flags; +#define PENDING 0 int epoch; } park; @@ -37,11 +40,33 @@ static void i915_globals_shrink(void) global->shrink(); } +static void __i915_globals_grace(struct rcu_head *rcu) +{ + /* Ratelimit parking as shrinking is quite slow */ + schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); +} + +static void __i915_globals_queue_rcu(void) +{ + park.epoch = atomic_inc_return(&epoch); + if (!atomic_read(&active)) { + init_rcu_head(&park.rcu); + call_rcu(&park.rcu, __i915_globals_grace); + } +} + static void __i915_globals_park(struct work_struct *work) { + destroy_rcu_head(&park.rcu); + /* Confirm nothing woke up in the last grace period */ - if (park.epoch == atomic_read(&epoch)) - i915_globals_shrink(); + if (park.epoch != atomic_read(&epoch)) { + __i915_globals_queue_rcu(); + return; + } + + clear_bit(PENDING, &park.flags); + i915_globals_shrink(); } void __init i915_global_register(struct i915_global *global) @@ -62,6 +87,7 @@ static void __i915_globals_cleanup(void) static __initconst int (* const initfn[])(void) = { i915_global_active_init, + i915_global_buddy_init, i915_global_context_init, i915_global_gem_context_init, i915_global_objects_init, @@ -84,7 +110,7 @@ int __init i915_globals_init(void) } } - INIT_RCU_WORK(&park.work, __i915_globals_park); + INIT_DELAYED_WORK(&park.work, __i915_globals_park); return 0; } @@ -102,8 +128,9 @@ void i915_globals_park(void) if (!atomic_dec_and_test(&active)) return; - park.epoch = atomic_inc_return(&epoch); - queue_rcu_work(system_wq, &park.work); + /* Queue cleanup after the next RCU grace period has freed slabs */ + if (!test_and_set_bit(PENDING, &park.flags)) + __i915_globals_queue_rcu(); } void i915_globals_unpark(void) @@ -112,12 +139,21 @@ void i915_globals_unpark(void) atomic_inc(&active); } +static void __exit __i915_globals_flush(void) +{ + atomic_inc(&active); /* skip shrinking */ + + rcu_barrier(); /* wait for the work to be queued */ + flush_delayed_work(&park.work); + + atomic_dec(&active); +} + void __exit i915_globals_exit(void) { - /* Flush any residual park_work */ - atomic_inc(&epoch); - flush_rcu_work(&park.work); + GEM_BUG_ON(atomic_read(&active)); + __i915_globals_flush(); __i915_globals_cleanup(); /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 04c1ce107fc0..b2f5cd9b9b1a 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -7,6 +7,8 @@ #ifndef _I915_GLOBALS_H_ #define _I915_GLOBALS_H_ +#include <linux/types.h> + typedef void (*i915_global_func_t)(void); struct i915_global { @@ -25,6 +27,7 @@ void i915_globals_exit(void); /* constructors */ int i915_global_active_init(void); +int i915_global_buddy_init(void); int i915_global_context_init(void); int i915_global_gem_context_init(void); int i915_global_objects_init(void); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8bc76fcff70d..594341e27a47 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -29,8 +29,8 @@ #include <linux/ascii85.h> #include <linux/nmi.h> +#include <linux/pagevec.h> #include <linux/scatterlist.h> -#include <linux/stop_machine.h> #include <linux/utsname.h> #include <linux/zlib.h> @@ -40,52 +40,17 @@ #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" +#include "gem/i915_gem_lmem.h" +#include "gt/intel_gt_pm.h" #include "i915_drv.h" #include "i915_gpu_error.h" +#include "i915_memcpy.h" #include "i915_scatterlist.h" #include "intel_csr.h" -static inline const struct intel_engine_cs * -engine_lookup(const struct drm_i915_private *i915, unsigned int id) -{ - if (id >= I915_NUM_ENGINES) - return NULL; - - return i915->engine[id]; -} - -static inline const char * -__engine_name(const struct intel_engine_cs *engine) -{ - return engine ? engine->name : ""; -} - -static const char * -engine_name(const struct drm_i915_private *i915, unsigned int id) -{ - return __engine_name(engine_lookup(i915, id)); -} - -static const char *tiling_flag(int tiling) -{ - switch (tiling) { - default: - case I915_TILING_NONE: return ""; - case I915_TILING_X: return " X"; - case I915_TILING_Y: return " Y"; - } -} - -static const char *dirty_flag(int dirty) -{ - return dirty ? " dirty" : ""; -} - -static const char *purgeable_flag(int purgeable) -{ - return purgeable ? " purgeable" : ""; -} +#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) +#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN) static void __sg_set_buf(struct scatterlist *sg, void *addr, unsigned int len, loff_t it) @@ -114,7 +79,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) if (e->cur == e->end) { struct scatterlist *sgl; - sgl = (typeof(sgl))__get_free_page(GFP_KERNEL); + sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL); if (!sgl) { e->err = -ENOMEM; return false; @@ -134,7 +99,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) } e->size = ALIGN(len + 1, SZ_64K); - e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + e->buf = kmalloc(e->size, ALLOW_FAIL); if (!e->buf) { e->size = PAGE_ALIGN(len + 1); e->buf = kmalloc(e->size, GFP_KERNEL); @@ -211,63 +176,132 @@ i915_error_printer(struct drm_i915_error_state_buf *e) return p; } +/* single threaded page allocator with a reserved stash for emergencies */ +static void pool_fini(struct pagevec *pv) +{ + pagevec_release(pv); +} + +static int pool_refill(struct pagevec *pv, gfp_t gfp) +{ + while (pagevec_space(pv)) { + struct page *p; + + p = alloc_page(gfp); + if (!p) + return -ENOMEM; + + pagevec_add(pv, p); + } + + return 0; +} + +static int pool_init(struct pagevec *pv, gfp_t gfp) +{ + int err; + + pagevec_init(pv); + + err = pool_refill(pv, gfp); + if (err) + pool_fini(pv); + + return err; +} + +static void *pool_alloc(struct pagevec *pv, gfp_t gfp) +{ + struct page *p; + + p = alloc_page(gfp); + if (!p && pagevec_count(pv)) + p = pv->pages[--pv->nr]; + + return p ? page_address(p) : NULL; +} + +static void pool_free(struct pagevec *pv, void *addr) +{ + struct page *p = virt_to_page(addr); + + if (pagevec_space(pv)) + pagevec_add(pv, p); + else + __free_page(p); +} + #ifdef CONFIG_DRM_I915_COMPRESS_ERROR -struct compress { +struct i915_vma_compress { + struct pagevec pool; struct z_stream_s zstream; void *tmp; }; -static bool compress_init(struct compress *c) +static bool compress_init(struct i915_vma_compress *c) { - struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream)); + struct z_stream_s *zstream = &c->zstream; - zstream->workspace = - kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), - GFP_ATOMIC | __GFP_NOWARN); - if (!zstream->workspace) + if (pool_init(&c->pool, ALLOW_FAIL)) return false; - if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) { - kfree(zstream->workspace); + zstream->workspace = + kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), + ALLOW_FAIL); + if (!zstream->workspace) { + pool_fini(&c->pool); return false; } c->tmp = NULL; if (i915_has_memcpy_from_wc()) - c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN); + c->tmp = pool_alloc(&c->pool, ALLOW_FAIL); return true; } -static void *compress_next_page(struct drm_i915_error_object *dst) +static bool compress_start(struct i915_vma_compress *c) +{ + struct z_stream_s *zstream = &c->zstream; + void *workspace = zstream->workspace; + + memset(zstream, 0, sizeof(*zstream)); + zstream->workspace = workspace; + + return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK; +} + +static void *compress_next_page(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { - unsigned long page; + void *page; if (dst->page_count >= dst->num_pages) return ERR_PTR(-ENOSPC); - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + page = pool_alloc(&c->pool, ALLOW_FAIL); if (!page) return ERR_PTR(-ENOMEM); - return dst->pages[dst->page_count++] = (void *)page; + return dst->pages[dst->page_count++] = page; } -static int compress_page(struct compress *c, +static int compress_page(struct i915_vma_compress *c, void *src, - struct drm_i915_error_object *dst) + struct i915_vma_coredump *dst, + bool wc) { struct z_stream_s *zstream = &c->zstream; zstream->next_in = src; - if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) + if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) zstream->next_in = c->tmp; zstream->avail_in = PAGE_SIZE; do { if (zstream->avail_out == 0) { - zstream->next_out = compress_next_page(dst); + zstream->next_out = compress_next_page(c, dst); if (IS_ERR(zstream->next_out)) return PTR_ERR(zstream->next_out); @@ -276,8 +310,6 @@ static int compress_page(struct compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; - - touch_nmi_watchdog(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -287,15 +319,15 @@ static int compress_page(struct compress *c, return 0; } -static int compress_flush(struct compress *c, - struct drm_i915_error_object *dst) +static int compress_flush(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { struct z_stream_s *zstream = &c->zstream; do { switch (zlib_deflate(zstream, Z_FINISH)) { case Z_OK: /* more space requested */ - zstream->next_out = compress_next_page(dst); + zstream->next_out = compress_next_page(c, dst); if (IS_ERR(zstream->next_out)) return PTR_ERR(zstream->next_out); @@ -316,15 +348,17 @@ end: return 0; } -static void compress_fini(struct compress *c, - struct drm_i915_error_object *dst) +static void compress_finish(struct i915_vma_compress *c) { - struct z_stream_s *zstream = &c->zstream; + zlib_deflateEnd(&c->zstream); +} - zlib_deflateEnd(zstream); - kfree(zstream->workspace); +static void compress_fini(struct i915_vma_compress *c) +{ + kfree(c->zstream.workspace); if (c->tmp) - free_page((unsigned long)c->tmp); + pool_free(&c->pool, c->tmp); + pool_fini(&c->pool); } static void err_compression_marker(struct drm_i915_error_state_buf *m) @@ -334,44 +368,53 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) #else -struct compress { +struct i915_vma_compress { + struct pagevec pool; }; -static bool compress_init(struct compress *c) +static bool compress_init(struct i915_vma_compress *c) +{ + return pool_init(&c->pool, ALLOW_FAIL) == 0; +} + +static bool compress_start(struct i915_vma_compress *c) { return true; } -static int compress_page(struct compress *c, +static int compress_page(struct i915_vma_compress *c, void *src, - struct drm_i915_error_object *dst) + struct i915_vma_coredump *dst, + bool wc) { - unsigned long page; void *ptr; - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); - if (!page) + ptr = pool_alloc(&c->pool, ALLOW_FAIL); + if (!ptr) return -ENOMEM; - ptr = (void *)page; - if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) + if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; return 0; } -static int compress_flush(struct compress *c, - struct drm_i915_error_object *dst) +static int compress_flush(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { return 0; } -static void compress_fini(struct compress *c, - struct drm_i915_error_object *dst) +static void compress_finish(struct i915_vma_compress *c) { } +static void compress_fini(struct i915_vma_compress *c) +{ + pool_fini(&c->pool); +} + static void err_compression_marker(struct drm_i915_error_state_buf *m) { err_puts(m, "~"); @@ -379,46 +422,17 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) #endif -static void print_error_buffers(struct drm_i915_error_state_buf *m, - const char *name, - struct drm_i915_error_buffer *err, - int count) -{ - err_printf(m, "%s [%d]:\n", name, count); - - while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x", - upper_32_bits(err->gtt_offset), - lower_32_bits(err->gtt_offset), - err->size, - err->read_domains, - err->write_domain); - err_puts(m, tiling_flag(err->tiling)); - err_puts(m, dirty_flag(err->dirty)); - err_puts(m, purgeable_flag(err->purgeable)); - err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); - - if (err->name) - err_printf(m, " (name: %d)", err->name); - if (err->fence_reg != I915_FENCE_REG_NONE) - err_printf(m, " (fence: %d)", err->fence_reg); - - err_puts(m, "\n"); - err++; - } -} - static void error_print_instdone(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct intel_engine_coredump *ee) { + const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu; int slice; int subslice; err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); - if (ee->engine_id != RCS0 || INTEL_GEN(m->i915) <= 3) + if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3) return; err_printf(m, " SC_INSTDONE: 0x%08x\n", @@ -427,12 +441,12 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (INTEL_GEN(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.sampler[slice][subslice]); - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.row[slice][subslice]); @@ -440,41 +454,56 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq, - const unsigned long epoch) + const struct i915_request_coredump *erq) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->context, erq->seqno, test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &erq->flags) ? "!" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &erq->flags) ? "+" : "", erq->sched_attr.priority, - jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, - const struct drm_i915_error_context *ctx) + const struct i915_gem_context_coredump *ctx) +{ + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, + ctx->guilty, ctx->active); +} + +static struct i915_vma_coredump * +__find_vma(struct i915_vma_coredump *vma, const char *name) +{ + while (vma) { + if (strcmp(vma->name, name) == 0) + return vma; + vma = vma->next; + } + + return NULL; +} + +static struct i915_vma_coredump * +find_batch(const struct intel_engine_coredump *ee) { - err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n", - header, ctx->comm, ctx->pid, ctx->hw_id, - ctx->sched_attr.priority, ctx->guilty, ctx->active); + return __find_vma(ee->vma, "batch"); } static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee, - const unsigned long epoch) + const struct intel_engine_coredump *ee) { + struct i915_vma_coredump *batch; int n; - err_printf(m, "%s command stream:\n", - engine_name(m->i915, ee->engine_id)); - err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); + err_printf(m, "%s command stream:\n", ee->engine->name); + err_printf(m, " CCID: 0x%08x\n", ee->ccid); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", @@ -489,9 +518,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_instdone(m, ee); - if (ee->batchbuffer) { - u64 start = ee->batchbuffer->gtt_offset; - u64 end = start + ee->batchbuffer->gtt_size; + batch = find_batch(ee); + if (batch) { + u64 start = batch->gtt_offset; + u64 end = start + batch->gtt_size; err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", upper_32_bits(start), lower_32_bits(start), @@ -523,17 +553,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); - err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", - jiffies_to_msecs(ee->hangcheck_timestamp - epoch), - ee->hangcheck_timestamp, - ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n], epoch); + error_print_request(m, " ", &ee->execlist[n]); } error_print_context(m, " Active context: ", &ee->context); @@ -548,35 +572,35 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -static void print_error_obj(struct drm_i915_error_state_buf *m, - struct intel_engine_cs *engine, - const char *name, - struct drm_i915_error_object *obj) +static void print_error_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; int page; - if (!obj) + if (!vma) return; - if (name) { - err_printf(m, "%s --- %s = 0x%08x %08x\n", - engine ? engine->name : "global", name, - upper_32_bits(obj->gtt_offset), - lower_32_bits(obj->gtt_offset)); - } + err_printf(m, "%s --- %s = 0x%08x %08x\n", + engine ? engine->name : "global", vma->name, + upper_32_bits(vma->gtt_offset), + lower_32_bits(vma->gtt_offset)); + + if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) + err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); err_compression_marker(m); - for (page = 0; page < obj->page_count; page++) { + for (page = 0; page < vma->page_count; page++) { int i, len; len = PAGE_SIZE; - if (page == obj->page_count - 1) - len -= obj->unused; + if (page == vma->page_count - 1) + len -= vma->unused; len = ascii85_encode_len(len); for (i = 0; i < len; i++) - err_puts(m, ascii85_encode(obj->pages[page][i], out)); + err_puts(m, ascii85_encode(vma->pages[page][i], out)); } err_puts(m, "\n"); } @@ -588,9 +612,10 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, { struct drm_printer p = i915_error_printer(m); - intel_device_info_dump_flags(info, &p); + intel_device_info_print_static(info, &p); + intel_device_info_print_runtime(runtime, &p); + intel_device_info_print_topology(&runtime->sseu, &p); intel_driver_caps_print(caps, &p); - intel_device_info_dump_topology(&runtime->sseu, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, @@ -614,18 +639,13 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, } static void err_print_uc(struct drm_i915_error_state_buf *m, - const struct i915_error_uc *error_uc) + const struct intel_uc_coredump *error_uc) { struct drm_printer p = i915_error_printer(m); - const struct i915_gpu_state *error = - container_of(error_uc, typeof(*error), uc); - - if (!error->device_info.has_guc) - return; intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); + print_error_vma(m, NULL, error_uc->guc_log); } static void err_free_sgl(struct scatterlist *sgl) @@ -645,18 +665,76 @@ static void err_free_sgl(struct scatterlist *sgl) } } +static void err_print_gt(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + const struct intel_engine_coredump *ee; + int i; + + err_printf(m, "GT awake: %s\n", yesno(gt->awake)); + err_printf(m, "EIR: 0x%08x\n", gt->eir); + err_printf(m, "IER: 0x%08x\n", gt->ier); + for (i = 0; i < gt->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]); + err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); + err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); + err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); + + for (i = 0; i < gt->nfence; i++) + err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); + + if (IS_GEN_RANGE(m->i915, 6, 11)) { + err_printf(m, "ERROR: 0x%08x\n", gt->error); + err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg); + } + + if (INTEL_GEN(m->i915) >= 8) + err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", + gt->fault_data1, gt->fault_data0); + + if (IS_GEN(m->i915, 7)) + err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int); + + if (IS_GEN_RANGE(m->i915, 8, 11)) + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache); + + if (IS_GEN(m->i915, 12)) + err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err); + + if (INTEL_GEN(m->i915) >= 12) { + int i; + + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, + gt->sfc_done[i]); + + err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); + } + + for (ee = gt->engine; ee; ee = ee->next) { + const struct i915_vma_coredump *vma; + + error_print_engine(m, ee); + for (vma = ee->vma; vma; vma = vma->next) + print_error_vma(m, ee->engine, vma); + } + + if (gt->uc) + err_print_uc(m, gt->uc); +} + static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, - struct i915_gpu_state *error) + struct i915_gpu_coredump *error) { - struct drm_i915_error_object *obj; + const struct intel_engine_coredump *ee; struct timespec64 ts; - int i, j; if (*error->error_msg) err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: %s %s\n", init_utsname()->release, init_utsname()->machine); + err_printf(m, "Driver: %s\n", DRIVER_DATE); ts = ktime_to_timespec64(error->time); err_printf(m, "Time: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); @@ -666,21 +744,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); - err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); - err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", - error->capture, - jiffies_to_msecs(jiffies - error->capture), - jiffies_to_msecs(error->capture - error->epoch)); - - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - if (!error->engine[i].context.pid) - continue; + err_printf(m, "Capture: %lu jiffies; %d ms ago\n", + error->capture, jiffies_to_msecs(jiffies - error->capture)); + for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next) err_printf(m, "Active process (on ring %s): %s [%d]\n", - engine_name(m->i915, i), - error->engine[i].context.comm, - error->engine[i].context.pid); - } + ee->engine->name, + ee->context.comm, + ee->context.pid); + err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); @@ -701,114 +773,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } - err_printf(m, "GT awake: %s\n", yesno(error->awake)); err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); - err_printf(m, "EIR: 0x%08x\n", error->eir); - err_printf(m, "IER: 0x%08x\n", error->ier); - for (i = 0; i < error->ngtier; i++) - err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); - err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); - err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); - err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); - err_printf(m, "CCID: 0x%08x\n", error->ccid); - - for (i = 0; i < error->nfence; i++) - err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); - - if (INTEL_GEN(m->i915) >= 6) { - err_printf(m, "ERROR: 0x%08x\n", error->error); - - if (INTEL_GEN(m->i915) >= 8) - err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", - error->fault_data1, error->fault_data0); - - err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); - } - - if (IS_GEN(m->i915, 7)) - err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); - - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - if (error->engine[i].engine_id != -1) - error_print_engine(m, &error->engine[i], error->epoch); - } - - for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { - char buf[128]; - int len, first = 1; - - if (!error->active_vm[i]) - break; - - len = scnprintf(buf, sizeof(buf), "Active ("); - for (j = 0; j < ARRAY_SIZE(error->engine); j++) { - if (error->engine[j].vm != error->active_vm[i]) - continue; - - len += scnprintf(buf + len, sizeof(buf), "%s%s", - first ? "" : ", ", - m->i915->engine[j]->name); - first = 0; - } - scnprintf(buf + len, sizeof(buf), ")"); - print_error_buffers(m, buf, - error->active_bo[i], - error->active_bo_count[i]); - } - - print_error_buffers(m, "Pinned (global)", - error->pinned_bo, - error->pinned_bo_count); - - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - const struct drm_i915_error_engine *ee = &error->engine[i]; - - obj = ee->batchbuffer; - if (obj) { - err_puts(m, m->i915->engine[i]->name); - if (ee->context.pid) - err_printf(m, " (submitted by %s [%d])", - ee->context.comm, - ee->context.pid); - err_printf(m, " --- gtt_offset = 0x%08x %08x\n", - upper_32_bits(obj->gtt_offset), - lower_32_bits(obj->gtt_offset)); - print_error_obj(m, m->i915->engine[i], NULL, obj); - } - - for (j = 0; j < ee->user_bo_count; j++) - print_error_obj(m, m->i915->engine[i], - "user", ee->user_bo[j]); - - if (ee->num_requests) { - err_printf(m, "%s --- %d requests\n", - m->i915->engine[i]->name, - ee->num_requests); - for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", - &ee->requests[j], - error->epoch); - } - - print_error_obj(m, m->i915->engine[i], - "ringbuffer", ee->ringbuffer); - - print_error_obj(m, m->i915->engine[i], - "HW Status", ee->hws_page); - print_error_obj(m, m->i915->engine[i], - "HW context", ee->ctx); - - print_error_obj(m, m->i915->engine[i], - "WA context", ee->wa_ctx); - - print_error_obj(m, m->i915->engine[i], - "WA batchbuffer", ee->wa_batchbuffer); - - print_error_obj(m, m->i915->engine[i], - "NULL context", ee->default_state); - } + if (error->gt) + err_print_gt(m, error->gt); if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -819,10 +788,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info, &error->runtime_info, &error->driver_caps); err_print_params(m, &error->params); - err_print_uc(m, &error->uc); } -static int err_print_to_sgl(struct i915_gpu_state *error) +static int err_print_to_sgl(struct i915_gpu_coredump *error) { struct drm_i915_error_state_buf m; @@ -859,8 +827,8 @@ static int err_print_to_sgl(struct i915_gpu_state *error) return 0; } -ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, - char *buf, loff_t off, size_t rem) +ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, + char *buf, loff_t off, size_t rem) { struct scatterlist *sg; size_t count; @@ -923,249 +891,224 @@ ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, return count; } -static void i915_error_object_free(struct drm_i915_error_object *obj) +static void i915_vma_coredump_free(struct i915_vma_coredump *vma) { - int page; - - if (obj == NULL) - return; + while (vma) { + struct i915_vma_coredump *next = vma->next; + int page; - for (page = 0; page < obj->page_count; page++) - free_page((unsigned long)obj->pages[page]); + for (page = 0; page < vma->page_count; page++) + free_page((unsigned long)vma->pages[page]); - kfree(obj); + kfree(vma); + vma = next; + } } - -static void cleanup_params(struct i915_gpu_state *error) +static void cleanup_params(struct i915_gpu_coredump *error) { i915_params_free(&error->params); } -static void cleanup_uc_state(struct i915_gpu_state *error) +static void cleanup_uc(struct intel_uc_coredump *uc) { - struct i915_error_uc *error_uc = &error->uc; + kfree(uc->guc_fw.path); + kfree(uc->huc_fw.path); + i915_vma_coredump_free(uc->guc_log); - kfree(error_uc->guc_fw.path); - kfree(error_uc->huc_fw.path); - i915_error_object_free(error_uc->guc_log); + kfree(uc); } -void __i915_gpu_state_free(struct kref *error_ref) +static void cleanup_gt(struct intel_gt_coredump *gt) { - struct i915_gpu_state *error = - container_of(error_ref, typeof(*error), ref); - long i, j; + while (gt->engine) { + struct intel_engine_coredump *ee = gt->engine; - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + gt->engine = ee->next; - for (j = 0; j < ee->user_bo_count; j++) - i915_error_object_free(ee->user_bo[j]); - kfree(ee->user_bo); + i915_vma_coredump_free(ee->vma); + kfree(ee); + } - i915_error_object_free(ee->batchbuffer); - i915_error_object_free(ee->wa_batchbuffer); - i915_error_object_free(ee->ringbuffer); - i915_error_object_free(ee->hws_page); - i915_error_object_free(ee->ctx); - i915_error_object_free(ee->wa_ctx); + if (gt->uc) + cleanup_uc(gt->uc); - kfree(ee->requests); - } + kfree(gt); +} - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) - kfree(error->active_bo[i]); - kfree(error->pinned_bo); +void __i915_gpu_coredump_free(struct kref *error_ref) +{ + struct i915_gpu_coredump *error = + container_of(error_ref, typeof(*error), ref); + + while (error->gt) { + struct intel_gt_coredump *gt = error->gt; + + error->gt = gt->next; + cleanup_gt(gt); + } kfree(error->overlay); kfree(error->display); cleanup_params(error); - cleanup_uc_state(error); err_free_sgl(error->sgl); kfree(error); } -static struct drm_i915_error_object * -i915_error_object_create(struct drm_i915_private *i915, - struct i915_vma *vma) +static struct i915_vma_coredump * +i915_vma_coredump_create(const struct intel_gt *gt, + const struct i915_vma *vma, + const char *name, + struct i915_vma_compress *compress) { - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = gt->ggtt; const u64 slot = ggtt->error_capture.start; - struct drm_i915_error_object *dst; - struct compress compress; + struct i915_vma_coredump *dst; unsigned long num_pages; struct sgt_iter iter; - dma_addr_t dma; int ret; - if (!vma || !vma->pages) + might_sleep(); + + if (!vma || !vma->pages || !compress) return NULL; num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), - GFP_ATOMIC | __GFP_NOWARN); + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL); if (!dst) return NULL; + if (!compress_start(compress)) { + kfree(dst); + return NULL; + } + + strcpy(dst->name, name); + dst->next = NULL; + dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->gtt_page_sizes = vma->page_sizes.gtt; dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0; - if (!compress_init(&compress)) { - kfree(dst); - return NULL; - } - ret = -EINVAL; - for_each_sgt_dma(dma, iter, vma->pages) { + if (drm_mm_node_allocated(&ggtt->error_capture)) { void __iomem *s; + dma_addr_t dma; + + for_each_sgt_daddr(dma, iter, vma->pages) { + ggtt->vm.insert_page(&ggtt->vm, dma, slot, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); + ret = compress_page(compress, + (void __force *)s, dst, + true); + io_mapping_unmap(s); + if (ret) + break; + } + } else if (i915_gem_object_is_lmem(vma->obj)) { + struct intel_memory_region *mem = vma->obj->mm.region; + dma_addr_t dma; + + for_each_sgt_daddr(dma, iter, vma->pages) { + void __iomem *s; + + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); + ret = compress_page(compress, + (void __force *)s, dst, + true); + io_mapping_unmap(s); + if (ret) + break; + } + } else { + struct page *page; - ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); + for_each_sgt_page(page, iter, vma->pages) { + void *s; - s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); - ret = compress_page(&compress, (void __force *)s, dst); - io_mapping_unmap_atomic(s); - if (ret) - break; + drm_clflush_pages(&page, 1); + + s = kmap(page); + ret = compress_page(compress, s, dst, false); + kunmap(page); + + drm_clflush_pages(&page, 1); + + if (ret) + break; + } } - if (ret || compress_flush(&compress, dst)) { + if (ret || compress_flush(compress, dst)) { while (dst->page_count--) - free_page((unsigned long)dst->pages[dst->page_count]); + pool_free(&compress->pool, dst->pages[dst->page_count]); kfree(dst); dst = NULL; } + compress_finish(compress); - compress_fini(&compress, dst); return dst; } -static void capture_bo(struct drm_i915_error_buffer *err, - struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - - err->size = obj->base.size; - err->name = obj->base.name; - - err->gtt_offset = vma->node.start; - err->read_domains = obj->read_domains; - err->write_domain = obj->write_domain; - err->fence_reg = vma->fence ? vma->fence->id : -1; - err->tiling = i915_gem_object_get_tiling(obj); - err->dirty = obj->mm.dirty; - err->purgeable = obj->mm.madv != I915_MADV_WILLNEED; - err->userptr = obj->userptr.mm != NULL; - err->cache_level = obj->cache_level; -} - -static u32 capture_error_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head, - unsigned int flags) -#define ACTIVE_ONLY BIT(0) -#define PINNED_ONLY BIT(1) -{ - struct i915_vma *vma; - int i = 0; - - list_for_each_entry(vma, head, vm_link) { - if (!vma->obj) - continue; - - if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma)) - continue; - - if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma)) - continue; - - capture_bo(err++, vma); - if (++i == count) - break; - } - - return i; -} - -/* - * Generate a semi-unique error code. The code is not meant to have meaning, The - * code's only purpose is to try to prevent false duplicated bug reports by - * grossly estimating a GPU error state. - * - * TODO Ideally, hashing the batchbuffer would be a very nice way to determine - * the hang if we could strip the GTT offset information from it. - * - * It's only a small step better than a random number in its current form. - */ -static u32 i915_error_generate_code(struct i915_gpu_state *error, - intel_engine_mask_t engine_mask) +static void gt_record_fences(struct intel_gt_coredump *gt) { - /* - * IPEHR would be an ideal way to detect errors, as it's the gross - * measure of "the command that hung." However, has some very common - * synchronization commands which almost always appear in the case - * strictly a client bug. Use instdone to differentiate those some. - */ - if (engine_mask) { - struct drm_i915_error_engine *ee = - &error->engine[ffs(engine_mask)]; - - return ee->ipehr ^ ee->instdone.instdone; - } - - return 0; -} - -static void gem_record_fences(struct i915_gpu_state *error) -{ - struct drm_i915_private *dev_priv = error->i915; - struct intel_uncore *uncore = &dev_priv->uncore; + struct i915_ggtt *ggtt = gt->_gt->ggtt; + struct intel_uncore *uncore = gt->_gt->uncore; int i; - if (INTEL_GEN(dev_priv) >= 6) { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + if (INTEL_GEN(uncore->i915) >= 6) { + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read64(uncore, FENCE_REG_GEN6_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 4) { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + } else if (INTEL_GEN(uncore->i915) >= 4) { + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read64(uncore, FENCE_REG_965_LO(i)); } else { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read(uncore, FENCE_REG(i)); } - error->nfence = i; + gt->nfence = i; } -static void error_record_engine_registers(struct i915_gpu_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) +static void engine_record_registers(struct intel_engine_coredump *ee) { - struct drm_i915_private *dev_priv = engine->i915; + const struct intel_engine_cs *engine = ee->engine; + struct drm_i915_private *i915 = engine->i915; - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(i915) >= 6) { ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL); - if (INTEL_GEN(dev_priv) >= 8) - ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); + + if (INTEL_GEN(i915) >= 12) + ee->fault_reg = intel_uncore_read(engine->uncore, + GEN12_RING_FAULT_REG); + else if (INTEL_GEN(i915) >= 8) + ee->fault_reg = intel_uncore_read(engine->uncore, + GEN8_RING_FAULT_REG); else ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine); } - if (INTEL_GEN(dev_priv) >= 4) { + if (INTEL_GEN(i915) >= 4) { ee->faddr = ENGINE_READ(engine, RING_DMA_FADD); ee->ipeir = ENGINE_READ(engine, RING_IPEIR); ee->ipehr = ENGINE_READ(engine, RING_IPEHR); ee->instps = ENGINE_READ(engine, RING_INSTPS); ee->bbaddr = ENGINE_READ(engine, RING_BBADDR); - if (INTEL_GEN(dev_priv) >= 8) { + ee->ccid = ENGINE_READ(engine, CCID); + if (INTEL_GEN(i915) >= 8) { ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32; ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32; } @@ -1184,13 +1127,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->head = ENGINE_READ(engine, RING_HEAD); ee->tail = ENGINE_READ(engine, RING_TAIL); ee->ctl = ENGINE_READ(engine, RING_CTL); - if (INTEL_GEN(dev_priv) > 2) + if (INTEL_GEN(i915) > 2) ee->mode = ENGINE_READ(engine, RING_MI_MODE); - if (!HWS_NEEDS_PHYSICAL(dev_priv)) { + if (!HWS_NEEDS_PHYSICAL(i915)) { i915_reg_t mmio; - if (IS_GEN(dev_priv, 7)) { + if (IS_GEN(i915, 7)) { switch (engine->id) { default: MISSING_CASE(engine->id); @@ -1215,184 +1158,155 @@ static void error_record_engine_registers(struct i915_gpu_state *error, mmio = RING_HWS_PGA(engine->mmio_base); } - ee->hws = I915_READ(mmio); + ee->hws = intel_uncore_read(engine->uncore, mmio); } - ee->idle = intel_engine_is_idle(engine); - if (!ee->idle) - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; - ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, - engine); + ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine); - if (HAS_PPGTT(dev_priv)) { + if (HAS_PPGTT(i915)) { int i; ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7); - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(i915, 6)) { ee->vm_info.pp_dir_base = ENGINE_READ(engine, RING_PP_DIR_BASE_READ); - } else if (IS_GEN(dev_priv, 7)) { + } else if (IS_GEN(i915, 7)) { ee->vm_info.pp_dir_base = ENGINE_READ(engine, RING_PP_DIR_BASE); - } else if (INTEL_GEN(dev_priv) >= 8) { + } else if (INTEL_GEN(i915) >= 8) { u32 base = engine->mmio_base; for (i = 0; i < 4; i++) { ee->vm_info.pdp[i] = - I915_READ(GEN8_RING_PDP_UDW(base, i)); + intel_uncore_read(engine->uncore, + GEN8_RING_PDP_UDW(base, i)); ee->vm_info.pdp[i] <<= 32; ee->vm_info.pdp[i] |= - I915_READ(GEN8_RING_PDP_LDW(base, i)); + intel_uncore_read(engine->uncore, + GEN8_RING_PDP_LDW(base, i)); } } } } -static void record_request(struct i915_request *request, - struct drm_i915_error_request *erq) +static void record_request(const struct i915_request *request, + struct i915_request_coredump *erq) { - struct i915_gem_context *ctx = request->gem_context; + const struct i915_gem_context *ctx; erq->flags = request->fence.flags; erq->context = request->fence.context; erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; - erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; + erq->pid = 0; rcu_read_lock(); - erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0; + ctx = rcu_dereference(request->context->gem_context); + if (ctx) + erq->pid = pid_nr(ctx->pid); rcu_read_unlock(); } -static void engine_record_requests(struct intel_engine_cs *engine, - struct i915_request *first, - struct drm_i915_error_engine *ee) +static void engine_record_execlists(struct intel_engine_coredump *ee) { - struct i915_request *request; - int count; + const struct intel_engine_execlists * const el = &ee->engine->execlists; + struct i915_request * const *port = el->active; + unsigned int n = 0; - count = 0; - request = first; - list_for_each_entry_from(request, &engine->active.requests, sched.link) - count++; - if (!count) - return; - - ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); - if (!ee->requests) - return; - - ee->num_requests = count; - - count = 0; - request = first; - list_for_each_entry_from(request, - &engine->active.requests, sched.link) { - if (count >= ee->num_requests) { - /* - * If the ring request list was changed in - * between the point where the error request - * list was created and dimensioned and this - * point then just exit early to avoid crashes. - * - * We don't need to communicate that the - * request list changed state during error - * state capture and that the error state is - * slightly incorrect as a consequence since we - * are typically only interested in the request - * list state at the point of error state - * capture, not in any changes happening during - * the capture. - */ - break; - } + while (*port) + record_request(*port++, &ee->execlist[n++]); - record_request(request, &ee->requests[count++]); - } - ee->num_requests = count; + ee->num_ports = n; } -static void error_record_engine_execlists(struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) +static bool record_context(struct i915_gem_context_coredump *e, + const struct i915_request *rq) { - const struct intel_engine_execlists * const execlists = &engine->execlists; - unsigned int n; - - for (n = 0; n < execlists_num_ports(execlists); n++) { - struct i915_request *rq = port_request(&execlists->port[n]); + struct i915_gem_context *ctx; + struct task_struct *task; + bool capture; - if (!rq) - break; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (!ctx) + return false; - record_request(rq, &ee->execlist[n]); + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; } + rcu_read_unlock(); - ee->num_ports = n; + e->sched_attr = ctx->sched; + e->guilty = atomic_read(&ctx->guilty_count); + e->active = atomic_read(&ctx->active_count); + + capture = i915_gem_context_no_error_capture(ctx); + + i915_gem_context_put(ctx); + return capture; } -static void record_context(struct drm_i915_error_context *e, - struct i915_gem_context *ctx) +struct intel_engine_capture_vma { + struct intel_engine_capture_vma *next; + struct i915_vma *vma; + char name[16]; +}; + +static struct intel_engine_capture_vma * +capture_vma(struct intel_engine_capture_vma *next, + struct i915_vma *vma, + const char *name, + gfp_t gfp) { - if (ctx->pid) { - struct task_struct *task; + struct intel_engine_capture_vma *c; - rcu_read_lock(); - task = pid_task(ctx->pid, PIDTYPE_PID); - if (task) { - strcpy(e->comm, task->comm); - e->pid = task->pid; - } - rcu_read_unlock(); + if (!vma) + return next; + + c = kmalloc(sizeof(*c), gfp); + if (!c) + return next; + + if (!i915_active_acquire_if_busy(&vma->active)) { + kfree(c); + return next; } - e->hw_id = ctx->hw_id; - e->sched_attr = ctx->sched; - e->guilty = atomic_read(&ctx->guilty_count); - e->active = atomic_read(&ctx->active_count); + strcpy(c->name, name); + c->vma = i915_vma_get(vma); + + c->next = next; + return c; } -static void request_record_user_bo(struct i915_request *request, - struct drm_i915_error_engine *ee) +static struct intel_engine_capture_vma * +capture_user(struct intel_engine_capture_vma *capture, + const struct i915_request *rq, + gfp_t gfp) { struct i915_capture_list *c; - struct drm_i915_error_object **bo; - long count, max; - max = 0; - for (c = request->capture_list; c; c = c->next) - max++; - if (!max) - return; - - bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC); - if (!bo) { - /* If we can't capture everything, try to capture something. */ - max = min_t(long, max, PAGE_SIZE / sizeof(*bo)); - bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC); - } - if (!bo) - return; + for (c = rq->capture_list; c; c = c->next) + capture = capture_vma(capture, c->vma, "user", gfp); - count = 0; - for (c = request->capture_list; c; c = c->next) { - bo[count] = i915_error_object_create(request->i915, c->vma); - if (!bo[count]) - break; - if (++count == max) - break; - } - - ee->user_bo = bo; - ee->user_bo_count = count; + return capture; } -static struct drm_i915_error_object * -capture_object(struct drm_i915_private *dev_priv, - struct drm_i915_gem_object *obj) +static struct i915_vma_coredump * +capture_object(const struct intel_gt *gt, + struct drm_i915_gem_object *obj, + const char *name, + struct i915_vma_compress *compress) { if (obj && i915_gem_object_has_pages(obj)) { struct i915_vma fake = { @@ -1402,190 +1316,221 @@ capture_object(struct drm_i915_private *dev_priv, .obj = obj, }; - return i915_error_object_create(dev_priv, &fake); + return i915_vma_coredump_create(gt, &fake, name, compress); } else { return NULL; } } -static void gem_record_rings(struct i915_gpu_state *error) +static void add_vma(struct intel_engine_coredump *ee, + struct i915_vma_coredump *vma) { - struct drm_i915_private *i915 = error->i915; - struct i915_ggtt *ggtt = &i915->ggtt; - int i; + if (vma) { + vma->next = ee->vma; + ee->vma = vma; + } +} - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_engine_cs *engine = i915->engine[i]; - struct drm_i915_error_engine *ee = &error->engine[i]; - struct i915_request *request; - unsigned long flags; +struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +{ + struct intel_engine_coredump *ee; - ee->engine_id = -1; + ee = kzalloc(sizeof(*ee), gfp); + if (!ee) + return NULL; - if (!engine) - continue; + ee->engine = engine; - ee->engine_id = i; + engine_record_registers(ee); + engine_record_execlists(ee); - error_record_engine_registers(error, engine, ee); - error_record_engine_execlists(engine, ee); + return ee; +} - spin_lock_irqsave(&engine->active.lock, flags); - request = intel_engine_find_active_request(engine); - if (request) { - struct i915_gem_context *ctx = request->gem_context; - struct intel_ring *ring = request->ring; +struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp) +{ + struct intel_engine_capture_vma *vma = NULL; - ee->vm = ctx->vm ?: &ggtt->vm; + ee->simulated |= record_context(&ee->context, rq); + if (ee->simulated) + return NULL; - record_context(&ee->context, ctx); + /* + * We need to copy these to an anonymous buffer + * as the simplest method to avoid being overwritten + * by userspace. + */ + vma = capture_vma(vma, rq->batch, "batch", gfp); + vma = capture_user(vma, rq, gfp); + vma = capture_vma(vma, rq->ring->vma, "ring", gfp); + vma = capture_vma(vma, rq->context->state, "HW context", gfp); - /* We need to copy these to an anonymous buffer - * as the simplest method to avoid being overwritten - * by userspace. - */ - ee->batchbuffer = - i915_error_object_create(i915, request->batch); + ee->rq_head = rq->head; + ee->rq_post = rq->postfix; + ee->rq_tail = rq->tail; - if (HAS_BROKEN_CS_TLB(i915)) - ee->wa_batchbuffer = - i915_error_object_create(i915, - i915->gt.scratch); - request_record_user_bo(request, ee); + return vma; +} - ee->ctx = - i915_error_object_create(i915, - request->hw_context->state); +void +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress) +{ + const struct intel_engine_cs *engine = ee->engine; - error->simulated |= - i915_gem_context_no_error_capture(ctx); + while (capture) { + struct intel_engine_capture_vma *this = capture; + struct i915_vma *vma = this->vma; - ee->rq_head = request->head; - ee->rq_post = request->postfix; - ee->rq_tail = request->tail; + add_vma(ee, + i915_vma_coredump_create(engine->gt, + vma, this->name, + compress)); - ee->cpu_ring_head = ring->head; - ee->cpu_ring_tail = ring->tail; - ee->ringbuffer = - i915_error_object_create(i915, ring->vma); + i915_active_release(&vma->active); + i915_vma_put(vma); - engine_record_requests(engine, request, ee); - } - spin_unlock_irqrestore(&engine->active.lock, flags); + capture = this->next; + kfree(this); + } - ee->hws_page = - i915_error_object_create(i915, - engine->status_page.vma); + add_vma(ee, + i915_vma_coredump_create(engine->gt, + engine->status_page.vma, + "HW Status", + compress)); - ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); + add_vma(ee, + i915_vma_coredump_create(engine->gt, + engine->wa_ctx.vma, + "WA context", + compress)); - ee->default_state = capture_object(i915, engine->default_state); - } + add_vma(ee, + capture_object(engine->gt, + engine->default_state, + "NULL context", + compress)); } -static void gem_capture_vm(struct i915_gpu_state *error, - struct i915_address_space *vm, - int idx) +static struct intel_engine_coredump * +capture_engine(struct intel_engine_cs *engine, + struct i915_vma_compress *compress) { - struct drm_i915_error_buffer *active_bo; - struct i915_vma *vma; - int count; + struct intel_engine_capture_vma *capture = NULL; + struct intel_engine_coredump *ee; + struct i915_request *rq; + unsigned long flags; - count = 0; - list_for_each_entry(vma, &vm->bound_list, vm_link) - if (i915_vma_is_active(vma)) - count++; - - active_bo = NULL; - if (count) - active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); - if (active_bo) - count = capture_error_bo(active_bo, - count, &vm->bound_list, - ACTIVE_ONLY); - else - count = 0; + ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); + if (!ee) + return NULL; - error->active_vm[idx] = vm; - error->active_bo[idx] = active_bo; - error->active_bo_count[idx] = count; + spin_lock_irqsave(&engine->active.lock, flags); + rq = intel_engine_find_active_request(engine); + if (rq) + capture = intel_engine_coredump_add_request(ee, rq, + ATOMIC_MAYFAIL); + spin_unlock_irqrestore(&engine->active.lock, flags); + if (!capture) { + kfree(ee); + return NULL; + } + + intel_engine_coredump_add_vma(ee, capture, compress); + + return ee; } -static void capture_active_buffers(struct i915_gpu_state *error) +static void +gt_record_engines(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) { - int cnt = 0, i, j; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt->_gt, id) { + struct intel_engine_coredump *ee; - BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo)); - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm)); - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count)); + /* Refill our page pool before entering atomic section */ + pool_refill(&compress->pool, ALLOW_FAIL); - /* Scan each engine looking for unique active contexts/vm */ - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; - bool found; + ee = capture_engine(engine, compress); + if (!ee) + continue; - if (!ee->vm) + gt->simulated |= ee->simulated; + if (ee->simulated) { + kfree(ee); continue; + } - found = false; - for (j = 0; j < i && !found; j++) - found = error->engine[j].vm == ee->vm; - if (!found) - gem_capture_vm(error, ee->vm, cnt++); + ee->next = gt->engine; + gt->engine = ee; } } -static void capture_pinned_buffers(struct i915_gpu_state *error) +static struct intel_uc_coredump * +gt_record_uc(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) { - struct i915_address_space *vm = &error->i915->ggtt.vm; - struct drm_i915_error_buffer *bo; - struct i915_vma *vma; - int count; + const struct intel_uc *uc = >->_gt->uc; + struct intel_uc_coredump *error_uc; - count = 0; - list_for_each_entry(vma, &vm->bound_list, vm_link) - count++; + error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL); + if (!error_uc) + return NULL; - bo = NULL; - if (count) - bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); - if (!bo) - return; + memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw)); + memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw)); - error->pinned_bo_count = - capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY); - error->pinned_bo = bo; + /* Non-default firmware paths will be specified by the modparam. + * As modparams are generally accesible from the userspace make + * explicit copies of the firmware paths. + */ + error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); + error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); + error_uc->guc_log = + i915_vma_coredump_create(gt->_gt, + uc->guc.log.vma, "GuC log buffer", + compress); + + return error_uc; } -static void capture_uc_state(struct i915_gpu_state *error) +static void gt_capture_prepare(struct intel_gt_coredump *gt) { - struct drm_i915_private *i915 = error->i915; - struct i915_error_uc *error_uc = &error->uc; + struct i915_ggtt *ggtt = gt->_gt->ggtt; - /* Capturing uC state won't be useful if there is no GuC */ - if (!error->device_info.has_guc) - return; + mutex_lock(&ggtt->error_mutex); +} - error_uc->guc_fw = i915->guc.fw; - error_uc->huc_fw = i915->huc.fw; +static void gt_capture_finish(struct intel_gt_coredump *gt) +{ + struct i915_ggtt *ggtt = gt->_gt->ggtt; - /* Non-default firmware paths will be specified by the modparam. - * As modparams are generally accesible from the userspace make - * explicit copies of the firmware paths. - */ - error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); - error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); - error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); + if (drm_mm_node_allocated(&ggtt->error_capture)) + ggtt->vm.clear_range(&ggtt->vm, + ggtt->error_capture.start, + PAGE_SIZE); + + mutex_unlock(&ggtt->error_mutex); } /* Capture all registers which don't fit into another category. */ -static void capture_reg_state(struct i915_gpu_state *error) +static void gt_record_regs(struct intel_gt_coredump *gt) { - struct drm_i915_private *i915 = error->i915; - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->_gt->uncore; + struct drm_i915_private *i915 = uncore->i915; int i; - /* General organization + /* + * General organization * 1. Registers specific to a single generation * 2. Registers which belong to multiple generations * 3. Feature specific registers. @@ -1595,122 +1540,162 @@ static void capture_reg_state(struct i915_gpu_state *error) /* 1: Registers specific to a single generation */ if (IS_VALLEYVIEW(i915)) { - error->gtier[0] = intel_uncore_read(uncore, GTIER); - error->ier = intel_uncore_read(uncore, VLV_IER); - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ier = intel_uncore_read(uncore, VLV_IER); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); } if (IS_GEN(i915, 7)) - error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); + gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); - if (INTEL_GEN(i915) >= 8) { - error->fault_data0 = intel_uncore_read(uncore, - GEN8_FAULT_TLB_DATA0); - error->fault_data1 = intel_uncore_read(uncore, - GEN8_FAULT_TLB_DATA1); + if (INTEL_GEN(i915) >= 12) { + gt->fault_data0 = intel_uncore_read(uncore, + GEN12_FAULT_TLB_DATA0); + gt->fault_data1 = intel_uncore_read(uncore, + GEN12_FAULT_TLB_DATA1); + } else if (INTEL_GEN(i915) >= 8) { + gt->fault_data0 = intel_uncore_read(uncore, + GEN8_FAULT_TLB_DATA0); + gt->fault_data1 = intel_uncore_read(uncore, + GEN8_FAULT_TLB_DATA1); } if (IS_GEN(i915, 6)) { - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); - error->gab_ctl = intel_uncore_read(uncore, GAB_CTL); - error->gfx_mode = intel_uncore_read(uncore, GFX_MODE); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); + gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL); + gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE); } /* 2: Registers which belong to multiple generations */ if (INTEL_GEN(i915) >= 7) - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); if (INTEL_GEN(i915) >= 6) { - error->derrmr = intel_uncore_read(uncore, DERRMR); - error->error = intel_uncore_read(uncore, ERROR_GEN6); - error->done_reg = intel_uncore_read(uncore, DONE_REG); + gt->derrmr = intel_uncore_read(uncore, DERRMR); + if (INTEL_GEN(i915) < 12) { + gt->error = intel_uncore_read(uncore, ERROR_GEN6); + gt->done_reg = intel_uncore_read(uncore, DONE_REG); + } } - if (INTEL_GEN(i915) >= 5) - error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE)); - /* 3: Feature specific registers */ if (IS_GEN_RANGE(i915, 6, 7)) { - error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); - error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); + gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); + gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); + } + + if (IS_GEN_RANGE(i915, 8, 11)) + gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); + + if (IS_GEN(i915, 12)) + gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); + + if (INTEL_GEN(i915) >= 12) { + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + gt->sfc_done[i] = + intel_uncore_read(uncore, GEN12_SFC_DONE(i)); + } + + gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); } /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { - error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); - error->gtier[0] = + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); + gt->gtier[0] = intel_uncore_read(uncore, GEN11_RENDER_COPY_INTR_ENABLE); - error->gtier[1] = + gt->gtier[1] = intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); - error->gtier[2] = + gt->gtier[2] = intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); - error->gtier[3] = + gt->gtier[3] = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - error->gtier[4] = + gt->gtier[4] = intel_uncore_read(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE); - error->gtier[5] = + gt->gtier[5] = intel_uncore_read(uncore, GEN11_GUNIT_CSME_INTR_ENABLE); - error->ngtier = 6; + gt->ngtier = 6; } else if (INTEL_GEN(i915) >= 8) { - error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) - error->gtier[i] = intel_uncore_read(uncore, - GEN8_GT_IER(i)); - error->ngtier = 4; + gt->gtier[i] = + intel_uncore_read(uncore, GEN8_GT_IER(i)); + gt->ngtier = 4; } else if (HAS_PCH_SPLIT(i915)) { - error->ier = intel_uncore_read(uncore, DEIER); - error->gtier[0] = intel_uncore_read(uncore, GTIER); - error->ngtier = 1; + gt->ier = intel_uncore_read(uncore, DEIER); + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ngtier = 1; } else if (IS_GEN(i915, 2)) { - error->ier = intel_uncore_read16(uncore, GEN2_IER); + gt->ier = intel_uncore_read16(uncore, GEN2_IER); } else if (!IS_VALLEYVIEW(i915)) { - error->ier = intel_uncore_read(uncore, GEN2_IER); + gt->ier = intel_uncore_read(uncore, GEN2_IER); } - error->eir = intel_uncore_read(uncore, EIR); - error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); + gt->eir = intel_uncore_read(uncore, EIR); + gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); +} + +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The + * code's only purpose is to try to prevent false duplicated bug reports by + * grossly estimating a GPU error state. + * + * TODO Ideally, hashing the batchbuffer would be a very nice way to determine + * the hang if we could strip the GTT offset information from it. + * + * It's only a small step better than a random number in its current form. + */ +static u32 generate_ecode(const struct intel_engine_coredump *ee) +{ + /* + * IPEHR would be an ideal way to detect errors, as it's the gross + * measure of "the command that hung." However, has some very common + * synchronization commands which almost always appear in the case + * strictly a client bug. Use instdone to differentiate those some. + */ + return ee ? ee->ipehr ^ ee->instdone.instdone : 0; } -static const char * -error_msg(struct i915_gpu_state *error, - intel_engine_mask_t engines, const char *msg) +static const char *error_msg(struct i915_gpu_coredump *error) { + struct intel_engine_coredump *first = NULL; + struct intel_gt_coredump *gt; + intel_engine_mask_t engines; int len; - int i; - for (i = 0; i < ARRAY_SIZE(error->engine); i++) - if (!error->engine[i].context.pid) - engines &= ~BIT(i); + engines = 0; + for (gt = error->gt; gt; gt = gt->next) { + struct intel_engine_coredump *cs; + + if (gt->engine && !first) + first = gt->engine; + + for (cs = gt->engine; cs; cs = cs->next) + engines |= cs->engine->mask; + } len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%x:0x%08x", + "GPU HANG: ecode %d:%x:%08x", INTEL_GEN(error->i915), engines, - i915_error_generate_code(error, engines)); - if (engines) { + generate_ecode(first)); + if (first && first->context.pid) { /* Just show the first executing process, more is confusing */ - i = __ffs(engines); len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[i].context.comm, - error->engine[i].context.pid); + first->context.comm, first->context.pid); } - if (msg) - len += scnprintf(error->error_msg + len, - sizeof(error->error_msg) - len, - ", %s", msg); return error->error_msg; } -static void capture_gen_state(struct i915_gpu_state *error) +static void capture_gen(struct i915_gpu_coredump *error) { struct drm_i915_private *i915 = error->i915; - error->awake = i915->gt.awake; error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); error->suspended = i915->runtime_pm.suspended; @@ -1721,6 +1706,7 @@ static void capture_gen_state(struct i915_gpu_state *error) error->reset_count = i915_reset_count(&i915->gpu_error); error->suspend_count = i915->suspend_count; + i915_params_copy(&error->params, &i915_modparams); memcpy(&error->device_info, INTEL_INFO(i915), sizeof(error->device_info)); @@ -1730,155 +1716,182 @@ static void capture_gen_state(struct i915_gpu_state *error) error->driver_caps = i915->caps; } -static void capture_params(struct i915_gpu_state *error) +struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) { - i915_params_copy(&error->params, &i915_modparams); -} + struct i915_gpu_coredump *error; -static unsigned long capture_find_epoch(const struct i915_gpu_state *error) -{ - unsigned long epoch = error->capture; - int i; + if (!i915_modparams.error_capture) + return NULL; - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - const struct drm_i915_error_engine *ee = &error->engine[i]; + error = kzalloc(sizeof(*error), gfp); + if (!error) + return NULL; - if (ee->hangcheck_timestamp && - time_before(ee->hangcheck_timestamp, epoch)) - epoch = ee->hangcheck_timestamp; - } + kref_init(&error->ref); + error->i915 = i915; + + error->time = ktime_get_real(); + error->boottime = ktime_get_boottime(); + error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); + error->capture = jiffies; + + capture_gen(error); - return epoch; + return error; } -static void capture_finish(struct i915_gpu_state *error) +#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) + +struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) { - struct i915_ggtt *ggtt = &error->i915->ggtt; - const u64 slot = ggtt->error_capture.start; + struct intel_gt_coredump *gc; + + gc = kzalloc(sizeof(*gc), gfp); + if (!gc) + return NULL; - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); + gc->_gt = gt; + gc->awake = intel_gt_pm_is_awake(gt); + + gt_record_regs(gc); + gt_record_fences(gc); + + return gc; } -static int capture(void *data) +struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt) { - struct i915_gpu_state *error = data; - - error->time = ktime_get_real(); - error->boottime = ktime_get_boottime(); - error->uptime = ktime_sub(ktime_get(), - error->i915->gt.last_init_time); - error->capture = jiffies; + struct i915_vma_compress *compress; - capture_params(error); - capture_gen_state(error); - capture_uc_state(error); - capture_reg_state(error); - gem_record_fences(error); - gem_record_rings(error); - capture_active_buffers(error); - capture_pinned_buffers(error); + compress = kmalloc(sizeof(*compress), ALLOW_FAIL); + if (!compress) + return NULL; - error->overlay = intel_overlay_capture_error_state(error->i915); - error->display = intel_display_capture_error_state(error->i915); + if (!compress_init(compress)) { + kfree(compress); + return NULL; + } - error->epoch = capture_find_epoch(error); + gt_capture_prepare(gt); - capture_finish(error); - return 0; + return compress; } -#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +void i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) +{ + if (!compress) + return; + + gt_capture_finish(gt); + + compress_fini(compress); + kfree(compress); +} -struct i915_gpu_state * -i915_capture_gpu_state(struct drm_i915_private *i915) +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; /* Check if GPU capture has been disabled */ error = READ_ONCE(i915->gpu_error.first_error); if (IS_ERR(error)) return error; - error = kzalloc(sizeof(*error), GFP_ATOMIC); - if (!error) { - i915_disable_error_state(i915, -ENOMEM); + error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL); + if (!error) return ERR_PTR(-ENOMEM); - } - kref_init(&error->ref); - error->i915 = i915; + error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL); + if (error->gt) { + struct i915_vma_compress *compress; + + compress = i915_vma_capture_prepare(error->gt); + if (!compress) { + kfree(error->gt); + kfree(error); + return ERR_PTR(-ENOMEM); + } + + gt_record_engines(error->gt, compress); + + if (INTEL_INFO(i915)->has_gt_uc) + error->gt->uc = gt_record_uc(error->gt, compress); - stop_machine(capture, error, NULL); + i915_vma_capture_finish(error->gt, compress); + + error->simulated |= error->gt->simulated; + } + + error->overlay = intel_overlay_capture_error_state(i915); + error->display = intel_display_capture_error_state(i915); return error; } -/** - * i915_capture_error_state - capture an error record for later analysis - * @i915: i915 device - * @engine_mask: the mask of engines triggering the hang - * @msg: a message to insert into the error capture header - * - * Should be called when an error is detected (either a hang or an error - * interrupt) to capture error state from the time of the error. Fills - * out a structure which becomes available in debugfs for user level tools - * to pick up. - */ -void i915_capture_error_state(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - const char *msg) +void i915_error_state_store(struct i915_gpu_coredump *error) { + struct drm_i915_private *i915; static bool warned; - struct i915_gpu_state *error; - unsigned long flags; - if (!i915_modparams.error_capture) + if (IS_ERR_OR_NULL(error)) return; - if (READ_ONCE(i915->gpu_error.first_error)) - return; + i915 = error->i915; + dev_info(i915->drm.dev, "%s\n", error_msg(error)); - error = i915_capture_gpu_state(i915); - if (IS_ERR(error)) + if (error->simulated || + cmpxchg(&i915->gpu_error.first_error, NULL, error)) return; - dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg)); + i915_gpu_coredump_get(error); - if (!error->simulated) { - spin_lock_irqsave(&i915->gpu_error.lock, flags); - if (!i915->gpu_error.first_error) { - i915->gpu_error.first_error = error; - error = NULL; - } - spin_unlock_irqrestore(&i915->gpu_error.lock, flags); + if (!xchg(&warned, true) && + ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { + pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); + pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); + pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); + pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); + pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", + i915->drm.primary->index); } +} - if (error) { - __i915_gpu_state_free(&error->ref); +/** + * i915_capture_error_state - capture an error record for later analysis + * @i915: i915 device + * + * Should be called when an error is detected (either a hang or an error + * interrupt) to capture error state from the time of the error. Fills + * out a structure which becomes available in debugfs for user level tools + * to pick up. + */ +void i915_capture_error_state(struct drm_i915_private *i915) +{ + struct i915_gpu_coredump *error; + + error = i915_gpu_coredump(i915); + if (IS_ERR(error)) { + cmpxchg(&i915->gpu_error.first_error, NULL, error); return; } - if (!warned && - ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { - DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); - DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); - DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); - DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); - DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", - i915->drm.primary->index); - warned = true; - } + i915_error_state_store(error); + i915_gpu_coredump_put(error); } -struct i915_gpu_state * +struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; spin_lock_irq(&i915->gpu_error.lock); error = i915->gpu_error.first_error; if (!IS_ERR_OR_NULL(error)) - i915_gpu_state_get(error); + i915_gpu_coredump_get(error); spin_unlock_irq(&i915->gpu_error.lock); return error; @@ -1886,7 +1899,7 @@ i915_first_error_state(struct drm_i915_private *i915) void i915_reset_error_state(struct drm_i915_private *i915) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; spin_lock_irq(&i915->gpu_error.lock); error = i915->gpu_error.first_error; @@ -1895,7 +1908,7 @@ void i915_reset_error_state(struct drm_i915_private *i915) spin_unlock_irq(&i915->gpu_error.lock); if (!IS_ERR_OR_NULL(error)) - i915_gpu_state_put(error); + i915_gpu_coredump_put(error); } void i915_disable_error_state(struct drm_i915_private *i915, int err) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 2ecd0c6a1c94..e4a6afed3bbf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -7,6 +7,7 @@ #ifndef _I915_GPU_ERROR_H_ #define _I915_GPU_ERROR_H_ +#include <linux/atomic.h> #include <linux/kref.h> #include <linux/ktime.h> #include <linux/sched.h> @@ -14,9 +15,9 @@ #include <drm/drm_mm.h> #include "gt/intel_engine.h" +#include "gt/uc/intel_uc_fw.h" #include "intel_device_info.h" -#include "intel_uc_fw.h" #include "i915_gem.h" #include "i915_gem_gtt.h" @@ -24,44 +25,100 @@ #include "i915_scheduler.h" struct drm_i915_private; +struct i915_vma_compress; +struct intel_engine_capture_vma; struct intel_overlay_error_state; struct intel_display_error_state; -struct i915_gpu_state { - struct kref ref; - ktime_t time; - ktime_t boottime; - ktime_t uptime; - unsigned long capture; - unsigned long epoch; +struct i915_vma_coredump { + struct i915_vma_coredump *next; - struct drm_i915_private *i915; + char name[20]; + + u64 gtt_offset; + u64 gtt_size; + u32 gtt_page_sizes; + + int num_pages; + int page_count; + int unused; + u32 *pages[0]; +}; + +struct i915_request_coredump { + unsigned long flags; + pid_t pid; + u32 context; + u32 seqno; + u32 start; + u32 head; + u32 tail; + struct i915_sched_attr sched_attr; +}; + +struct intel_engine_coredump { + const struct intel_engine_cs *engine; - char error_msg[128]; bool simulated; - bool awake; - bool wakelock; - bool suspended; - int iommu; u32 reset_count; - u32 suspend_count; - struct intel_device_info device_info; - struct intel_runtime_info runtime_info; - struct intel_driver_caps driver_caps; - struct i915_params params; - struct i915_error_uc { - struct intel_uc_fw guc_fw; - struct intel_uc_fw huc_fw; - struct drm_i915_error_object *guc_log; - } uc; + /* position of active request inside the ring */ + u32 rq_head, rq_post, rq_tail; + + /* Register state */ + u32 ccid; + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 bbstate; + u32 instpm; + u32 instps; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + struct intel_instdone instdone; + + struct i915_gem_context_coredump { + char comm[TASK_COMM_LEN]; + pid_t pid; + int active; + int guilty; + struct i915_sched_attr sched_attr; + } context; + + struct i915_vma_coredump *vma; + + struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + + struct intel_engine_coredump *next; +}; + +struct intel_gt_coredump { + const struct intel_gt *_gt; + bool awake; + bool simulated; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; u32 gtier[6], ngtier; - u32 ccid; u32 derrmr; u32 forcewake; u32 error; /* gen6+ */ @@ -73,172 +130,68 @@ struct i915_gpu_state { u32 gam_ecochk; u32 gab_ctl; u32 gfx_mode; + u32 gtt_cache; + u32 aux_err; /* gen12 */ + u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ + u32 gam_done; /* gen12 */ u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; + + struct intel_engine_coredump *engine; + + struct intel_uc_coredump { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct i915_vma_coredump *guc_log; + } *uc; + + struct intel_gt_coredump *next; +}; + +struct i915_gpu_coredump { + struct kref ref; + ktime_t time; + ktime_t boottime; + ktime_t uptime; + unsigned long capture; + + struct drm_i915_private *i915; + + struct intel_gt_coredump *gt; + + char error_msg[128]; + bool simulated; + bool wakelock; + bool suspended; + int iommu; + u32 reset_count; + u32 suspend_count; + + struct intel_device_info device_info; + struct intel_runtime_info runtime_info; + struct intel_driver_caps driver_caps; + struct i915_params params; + struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_engine { - int engine_id; - /* Software tracked state */ - bool idle; - unsigned long hangcheck_timestamp; - struct i915_address_space *vm; - int num_requests; - u32 reset_count; - - /* position of active request inside the ring */ - u32 rq_head, rq_post, rq_tail; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 mode; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 bbstate; - u32 instpm; - u32 instps; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - struct intel_instdone instdone; - - struct drm_i915_error_context { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 hw_id; - int active; - int guilty; - struct i915_sched_attr sched_attr; - } context; - - struct drm_i915_error_object { - u64 gtt_offset; - u64 gtt_size; - int num_pages; - int page_count; - int unused; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object **user_bo; - long user_bo_count; - - struct drm_i915_error_object *wa_ctx; - struct drm_i915_error_object *default_state; - - struct drm_i915_error_request { - unsigned long flags; - long jiffies; - pid_t pid; - u32 context; - u32 seqno; - u32 start; - u32 head; - u32 tail; - struct i915_sched_attr sched_attr; - } *requests, execlist[EXECLIST_MAX_PORTS]; - unsigned int num_ports; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - } engine[I915_NUM_ENGINES]; - - struct drm_i915_error_buffer { - u32 size; - u32 name; - u64 gtt_offset; - u32 read_domains; - u32 write_domain; - s32 fence_reg:I915_MAX_NUM_FENCE_BITS; - u32 tiling:2; - u32 dirty:1; - u32 purgeable:1; - u32 userptr:1; - u32 cache_level:3; - } *active_bo[I915_NUM_ENGINES], *pinned_bo; - u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; - struct i915_address_space *active_vm[I915_NUM_ENGINES]; - struct scatterlist *sgl, *fit; }; struct i915_gpu_error { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work hangcheck_work; - /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct i915_gpu_state *first_error; + struct i915_gpu_coredump *first_error; atomic_t pending_fb_pin; - /** - * flags: Control various stages of the GPU reset - * - * #I915_RESET_BACKOFF - When we start a global reset, we need to - * serialise with any other users attempting to do the same, and - * any global resources that may be clobber by the reset (such as - * FENCE registers). - * - * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit - * flag to prevent two concurrent reset attempts in the same engine. - * As the number of engines continues to grow, allocate the flags from - * the most significant bits. - * - * #I915_WEDGED - If reset fails and we can no longer use the GPU, - * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_request_alloc(), this bit is checked and the sequence - * aborted (with -EIO reported to userspace) if set. - */ - unsigned long flags; -#define I915_RESET_BACKOFF 0 -#define I915_RESET_MODESET 1 -#define I915_RESET_ENGINE 2 -#define I915_WEDGED (BITS_PER_LONG - 1) - /** Number of times the device has been reset (global) */ - u32 reset_count; + atomic_t reset_count; /** Number of times an engine has been reset */ - u32 reset_engine_count[I915_NUM_ENGINES]; - - struct mutex wedge_mutex; /* serialises wedging/unwedging */ - - /** - * Waitqueue to signal when a hang is detected. Used to for waiters - * to release the struct_mutex for the reset to procede. - */ - wait_queue_head_t wait_queue; - - /** - * Waitqueue to signal when the reset has completed. Used by clients - * that wait for dev_priv->mm.wedged to settle. - */ - wait_queue_head_t reset_queue; - - struct srcu_struct reset_backoff_srcu; + atomic_t reset_engine_count[I915_NUM_ENGINES]; }; struct drm_i915_error_state_buf { @@ -258,41 +211,118 @@ struct drm_i915_error_state_buf { __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); -void i915_capture_error_state(struct drm_i915_private *dev_priv, - intel_engine_mask_t engine_mask, - const char *error_msg); +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915); +void i915_capture_error_state(struct drm_i915_private *i915); + +struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); + +struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp); + +struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp); -static inline struct i915_gpu_state * -i915_gpu_state_get(struct i915_gpu_state *gpu) +struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp); + +void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress); + +struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt); + +void i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress); + +void i915_error_state_store(struct i915_gpu_coredump *error); + +static inline struct i915_gpu_coredump * +i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) { kref_get(&gpu->ref); return gpu; } -ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, - char *buf, loff_t offset, size_t count); +ssize_t +i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, + char *buf, loff_t offset, size_t count); -void __i915_gpu_state_free(struct kref *kref); -static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +void __i915_gpu_coredump_free(struct kref *kref); +static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { if (gpu) - kref_put(&gpu->ref, __i915_gpu_state_free); + kref_put(&gpu->ref, __i915_gpu_coredump_free); } -struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); #else -static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg) +static inline void i915_capture_error_state(struct drm_i915_private *i915) +{ +} + +static inline struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp) +{ + return NULL; +} + +static inline void +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress) +{ +} + +static inline struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt) +{ + return NULL; +} + +static inline void +i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) +{ +} + +static inline void +i915_error_state_store(struct i915_gpu_coredump *error) +{ +} + +static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { } -static inline struct i915_gpu_state * +static inline struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b2e27b5b0df9..afc6aad9bf8c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -29,7 +29,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/circ_buf.h> -#include <linux/cpuidle.h> #include <linux/slab.h> #include <linux/sysrq.h> @@ -37,15 +36,20 @@ #include <drm/drm_irq.h> #include <drm/i915_drm.h> +#include "display/intel_display_types.h" #include "display/intel_fifo_underrun.h" #include "display/intel_hotplug.h" #include "display/intel_lpe_audio.h" #include "display/intel_psr.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_pm_irq.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_trace.h" -#include "intel_drv.h" #include "intel_pm.h" /** @@ -56,6 +60,8 @@ * and related files, but that will be described in separate chapters. */ +typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val); + static const u32 hpd_ilk[HPD_NUM_PINS] = { [HPD_PORT_A] = DE_DP_A_HOTPLUG, }; @@ -133,23 +139,38 @@ static const u32 hpd_gen11[HPD_NUM_PINS] = { [HPD_PORT_F] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG }; +static const u32 hpd_gen12[HPD_NUM_PINS] = { + [HPD_PORT_D] = GEN11_TC1_HOTPLUG | GEN11_TBT1_HOTPLUG, + [HPD_PORT_E] = GEN11_TC2_HOTPLUG | GEN11_TBT2_HOTPLUG, + [HPD_PORT_F] = GEN11_TC3_HOTPLUG | GEN11_TBT3_HOTPLUG, + [HPD_PORT_G] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG, + [HPD_PORT_H] = GEN12_TC5_HOTPLUG | GEN12_TBT5_HOTPLUG, + [HPD_PORT_I] = GEN12_TC6_HOTPLUG | GEN12_TBT6_HOTPLUG +}; + static const u32 hpd_icp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP, - [HPD_PORT_D] = SDE_TC2_HOTPLUG_ICP, - [HPD_PORT_E] = SDE_TC3_HOTPLUG_ICP, - [HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), }; -static const u32 hpd_mcc[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP +static const u32 hpd_tgp[HPD_NUM_PINS] = { + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4), + [HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5), + [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; -static void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, - i915_reg_t iir, i915_reg_t ier) +void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, + i915_reg_t iir, i915_reg_t ier) { intel_uncore_write(uncore, imr, 0xffffffff); intel_uncore_posting_read(uncore, imr); @@ -163,7 +184,7 @@ static void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, intel_uncore_posting_read(uncore, iir); } -static void gen2_irq_reset(struct intel_uncore *uncore) +void gen2_irq_reset(struct intel_uncore *uncore) { intel_uncore_write16(uncore, GEN2_IMR, 0xffff); intel_uncore_posting_read16(uncore, GEN2_IMR); @@ -177,19 +198,6 @@ static void gen2_irq_reset(struct intel_uncore *uncore) intel_uncore_posting_read16(uncore, GEN2_IIR); } -#define GEN8_IRQ_RESET_NDX(uncore, type, which) \ -({ \ - unsigned int which_ = which; \ - gen3_irq_reset((uncore), GEN8_##type##_IMR(which_), \ - GEN8_##type##_IIR(which_), GEN8_##type##_IER(which_)); \ -}) - -#define GEN3_IRQ_RESET(uncore, type) \ - gen3_irq_reset((uncore), type##IMR, type##IIR, type##IER) - -#define GEN2_IRQ_RESET(uncore) \ - gen2_irq_reset(uncore) - /* * We should clear IMR at preinstall/uninstall, and just check at postinstall. */ @@ -223,10 +231,10 @@ static void gen2_assert_iir_is_zero(struct intel_uncore *uncore) intel_uncore_posting_read16(uncore, GEN2_IIR); } -static void gen3_irq_init(struct intel_uncore *uncore, - i915_reg_t imr, u32 imr_val, - i915_reg_t ier, u32 ier_val, - i915_reg_t iir) +void gen3_irq_init(struct intel_uncore *uncore, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) { gen3_assert_iir_is_zero(uncore, iir); @@ -235,8 +243,8 @@ static void gen3_irq_init(struct intel_uncore *uncore, intel_uncore_posting_read(uncore, imr); } -static void gen2_irq_init(struct intel_uncore *uncore, - u32 imr_val, u32 ier_val) +void gen2_irq_init(struct intel_uncore *uncore, + u32 imr_val, u32 ier_val) { gen2_assert_iir_is_zero(uncore); @@ -245,27 +253,6 @@ static void gen2_irq_init(struct intel_uncore *uncore, intel_uncore_posting_read16(uncore, GEN2_IMR); } -#define GEN8_IRQ_INIT_NDX(uncore, type, which, imr_val, ier_val) \ -({ \ - unsigned int which_ = which; \ - gen3_irq_init((uncore), \ - GEN8_##type##_IMR(which_), imr_val, \ - GEN8_##type##_IER(which_), ier_val, \ - GEN8_##type##_IIR(which_)); \ -}) - -#define GEN3_IRQ_INIT(uncore, type, imr_val, ier_val) \ - gen3_irq_init((uncore), \ - type##IMR, imr_val, \ - type##IER, ier_val, \ - type##IIR) - -#define GEN2_IRQ_INIT(uncore, imr_val, ier_val) \ - gen2_irq_init((uncore), imr_val, ier_val) - -static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); -static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); - /* For display hotplug interrupt */ static inline void i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, @@ -304,41 +291,6 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, spin_unlock_irq(&dev_priv->irq_lock); } -static u32 -gen11_gt_engine_identity(struct drm_i915_private * const i915, - const unsigned int bank, const unsigned int bit); - -static bool gen11_reset_one_iir(struct drm_i915_private * const i915, - const unsigned int bank, - const unsigned int bit) -{ - void __iomem * const regs = i915->uncore.regs; - u32 dw; - - lockdep_assert_held(&i915->irq_lock); - - dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); - if (dw & BIT(bit)) { - /* - * According to the BSpec, DW_IIR bits cannot be cleared without - * first servicing the Selector & Shared IIR registers. - */ - gen11_gt_engine_identity(i915, bank, bit); - - /* - * We locked GT INT DW by reading it. If we want to (try - * to) recover from this succesfully, we need to clear - * our bit, otherwise we are locking the register for - * everybody. - */ - raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit)); - - return true; - } - - return false; -} - /** * ilk_update_display_irq - update DEIMR * @dev_priv: driver private @@ -370,305 +322,6 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, } /** - * ilk_update_gt_irq - update GTIMR - * @dev_priv: driver private - * @interrupt_mask: mask of interrupt bits to update - * @enabled_irq_mask: mask of interrupt bits to enable - */ -static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, - u32 interrupt_mask, - u32 enabled_irq_mask) -{ - lockdep_assert_held(&dev_priv->irq_lock); - - WARN_ON(enabled_irq_mask & ~interrupt_mask); - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return; - - dev_priv->gt_irq_mask &= ~interrupt_mask; - dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask); - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); -} - -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) -{ - ilk_update_gt_irq(dev_priv, mask, mask); - intel_uncore_posting_read_fw(&dev_priv->uncore, GTIMR); -} - -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) -{ - ilk_update_gt_irq(dev_priv, mask, 0); -} - -static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) -{ - WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11); - - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; -} - -static void write_pm_imr(struct drm_i915_private *dev_priv) -{ - i915_reg_t reg; - u32 mask = dev_priv->pm_imr; - - if (INTEL_GEN(dev_priv) >= 11) { - reg = GEN11_GPM_WGBOXPERF_INTR_MASK; - /* pm is in upper half */ - mask = mask << 16; - } else if (INTEL_GEN(dev_priv) >= 8) { - reg = GEN8_GT_IMR(2); - } else { - reg = GEN6_PMIMR; - } - - I915_WRITE(reg, mask); - POSTING_READ(reg); -} - -static void write_pm_ier(struct drm_i915_private *dev_priv) -{ - i915_reg_t reg; - u32 mask = dev_priv->pm_ier; - - if (INTEL_GEN(dev_priv) >= 11) { - reg = GEN11_GPM_WGBOXPERF_INTR_ENABLE; - /* pm is in upper half */ - mask = mask << 16; - } else if (INTEL_GEN(dev_priv) >= 8) { - reg = GEN8_GT_IER(2); - } else { - reg = GEN6_PMIER; - } - - I915_WRITE(reg, mask); -} - -/** - * snb_update_pm_irq - update GEN6_PMIMR - * @dev_priv: driver private - * @interrupt_mask: mask of interrupt bits to update - * @enabled_irq_mask: mask of interrupt bits to enable - */ -static void snb_update_pm_irq(struct drm_i915_private *dev_priv, - u32 interrupt_mask, - u32 enabled_irq_mask) -{ - u32 new_val; - - WARN_ON(enabled_irq_mask & ~interrupt_mask); - - lockdep_assert_held(&dev_priv->irq_lock); - - new_val = dev_priv->pm_imr; - new_val &= ~interrupt_mask; - new_val |= (~enabled_irq_mask & interrupt_mask); - - if (new_val != dev_priv->pm_imr) { - dev_priv->pm_imr = new_val; - write_pm_imr(dev_priv); - } -} - -void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) -{ - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return; - - snb_update_pm_irq(dev_priv, mask, mask); -} - -static void __gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) -{ - snb_update_pm_irq(dev_priv, mask, 0); -} - -void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) -{ - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return; - - __gen6_mask_pm_irq(dev_priv, mask); -} - -static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) -{ - i915_reg_t reg = gen6_pm_iir(dev_priv); - - lockdep_assert_held(&dev_priv->irq_lock); - - I915_WRITE(reg, reset_mask); - I915_WRITE(reg, reset_mask); - POSTING_READ(reg); -} - -static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) -{ - lockdep_assert_held(&dev_priv->irq_lock); - - dev_priv->pm_ier |= enable_mask; - write_pm_ier(dev_priv); - gen6_unmask_pm_irq(dev_priv, enable_mask); - /* unmask_pm_irq provides an implicit barrier (POSTING_READ) */ -} - -static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) -{ - lockdep_assert_held(&dev_priv->irq_lock); - - dev_priv->pm_ier &= ~disable_mask; - __gen6_mask_pm_irq(dev_priv, disable_mask); - write_pm_ier(dev_priv); - /* though a barrier is missing here, but don't really need a one */ -} - -void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - spin_lock_irq(&dev_priv->irq_lock); - - while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)) - ; - - dev_priv->gt_pm.rps.pm_iir = 0; - - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - spin_lock_irq(&dev_priv->irq_lock); - gen6_reset_pm_iir(dev_priv, GEN6_PM_RPS_EVENTS); - dev_priv->gt_pm.rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(&dev_priv->irq_lock); - WARN_ON_ONCE(rps->pm_iir); - - if (INTEL_GEN(dev_priv) >= 11) - WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)); - else - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - - rps->interrupts_enabled = true; - gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); - - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (!READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(&dev_priv->irq_lock); - rps->interrupts_enabled = false; - - I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); - - gen6_disable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); - - spin_unlock_irq(&dev_priv->irq_lock); - synchronize_irq(dev_priv->drm.irq); - - /* Now that we will not be generating any more work, flush any - * outstanding tasks. As we are called on the RPS idle path, - * we will reset the GPU to minimum frequencies, so the current - * state of the worker can be discarded. - */ - cancel_work_sync(&rps->work); - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else - gen6_reset_rps_interrupts(dev_priv); -} - -void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) -{ - assert_rpm_wakelock_held(&dev_priv->runtime_pm); - - spin_lock_irq(&dev_priv->irq_lock); - gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events); - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv) -{ - assert_rpm_wakelock_held(&dev_priv->runtime_pm); - - spin_lock_irq(&dev_priv->irq_lock); - if (!dev_priv->guc.interrupts.enabled) { - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & - dev_priv->pm_guc_events); - dev_priv->guc.interrupts.enabled = true; - gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events); - } - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) -{ - assert_rpm_wakelock_held(&dev_priv->runtime_pm); - - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->guc.interrupts.enabled = false; - - gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events); - - spin_unlock_irq(&dev_priv->irq_lock); - synchronize_irq(dev_priv->drm.irq); - - gen9_reset_guc_interrupts(dev_priv); -} - -void gen11_reset_guc_interrupts(struct drm_i915_private *i915) -{ - spin_lock_irq(&i915->irq_lock); - gen11_reset_one_iir(i915, 0, GEN11_GUC); - spin_unlock_irq(&i915->irq_lock); -} - -void gen11_enable_guc_interrupts(struct drm_i915_private *dev_priv) -{ - spin_lock_irq(&dev_priv->irq_lock); - if (!dev_priv->guc.interrupts.enabled) { - u32 events = REG_FIELD_PREP(ENGINE1_MASK, - GEN11_GUC_INTR_GUC2HOST); - - WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GUC)); - I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, events); - I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~events); - dev_priv->guc.interrupts.enabled = true; - } - spin_unlock_irq(&dev_priv->irq_lock); -} - -void gen11_disable_guc_interrupts(struct drm_i915_private *dev_priv) -{ - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->guc.interrupts.enabled = false; - - I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~0); - I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0); - - spin_unlock_irq(&dev_priv->irq_lock); - synchronize_irq(dev_priv->drm.irq); - - gen11_reset_guc_interrupts(dev_priv); -} - -/** * bdw_update_port_irq - update DE port interrupt * @dev_priv: driver private * @interrupt_mask: mask of interrupt bits to update @@ -924,11 +577,12 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) /* Called from drm generic code, passed a 'crtc', which * we use as a pipe index */ -static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) +u32 i915_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[drm_crtc_index(crtc)]; const struct drm_display_mode *mode = &vblank->hwmode; + enum pipe pipe = to_intel_crtc(crtc)->pipe; i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; unsigned long irqflags; @@ -989,9 +643,10 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) return (((high1 << 8) | low) + (pixel >= vbl_start)) & 0xffffff; } -static u32 g4x_get_vblank_counter(struct drm_device *dev, unsigned int pipe) +u32 g4x_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; return I915_READ(PIPE_FRMCOUNT_G4X(pipe)); } @@ -1107,14 +762,14 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + crtc->scanline_offset) % vtotal; } -static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - bool in_vblank_irq, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode) +bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int index, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); + struct intel_crtc *crtc = to_intel_crtc(drm_crtc_from_index(dev, index)); + enum pipe pipe = crtc->pipe; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; @@ -1157,7 +812,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ - position = __intel_get_crtc_scanline(intel_crtc); + position = __intel_get_crtc_scanline(crtc); } else { /* Have access to pixelcount since start of frame. * We can split this into vertical and horizontal @@ -1237,200 +892,8 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) return position; } -static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 busy_up, busy_down, max_avg, min_avg; - u8 new_delay; - - spin_lock(&mchdev_lock); - - intel_uncore_write16(uncore, - MEMINTRSTS, - intel_uncore_read(uncore, MEMINTRSTS)); - - new_delay = dev_priv->ips.cur_delay; - - intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); - busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); - max_avg = intel_uncore_read(uncore, RCBMAXAVG); - min_avg = intel_uncore_read(uncore, RCBMINAVG); - - /* Handle RCS change request from hw */ - if (busy_up > max_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay) - new_delay = dev_priv->ips.cur_delay - 1; - if (new_delay < dev_priv->ips.max_delay) - new_delay = dev_priv->ips.max_delay; - } else if (busy_down < min_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay) - new_delay = dev_priv->ips.cur_delay + 1; - if (new_delay > dev_priv->ips.min_delay) - new_delay = dev_priv->ips.min_delay; - } - - if (ironlake_set_drps(dev_priv, new_delay)) - dev_priv->ips.cur_delay = new_delay; - - spin_unlock(&mchdev_lock); - - return; -} - -static void vlv_c0_read(struct drm_i915_private *dev_priv, - struct intel_rps_ei *ei) -{ - ei->ktime = ktime_get_raw(); - ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); - ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); -} - -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) -{ - memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); -} - -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const struct intel_rps_ei *prev = &rps->ei; - struct intel_rps_ei now; - u32 events = 0; - - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) - return 0; - - vlv_c0_read(dev_priv, &now); - - if (prev->ktime) { - u64 time, c0; - u32 render, media; - - time = ktime_us_delta(now.ktime, prev->ktime); - - time *= dev_priv->czclk_freq; - - /* Workload can be split between render + media, - * e.g. SwapBuffers being blitted in X after being rendered in - * mesa. To account for this we need to combine both engines - * into our activity counter. - */ - render = now.render_c0 - prev->render_c0; - media = now.media_c0 - prev->media_c0; - c0 = max(render, media); - c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - - if (c0 > time * rps->power.up_threshold) - events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * rps->power.down_threshold) - events = GEN6_PM_RP_DOWN_THRESHOLD; - } - - rps->ei = now; - return events; -} - -static void gen6_pm_rps_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, gt_pm.rps.work); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - bool client_boost = false; - int new_delay, adj, min, max; - u32 pm_iir = 0; - - spin_lock_irq(&dev_priv->irq_lock); - if (rps->interrupts_enabled) { - pm_iir = fetch_and_zero(&rps->pm_iir); - client_boost = atomic_read(&rps->num_waiters); - } - spin_unlock_irq(&dev_priv->irq_lock); - - /* Make sure we didn't queue anything we're not going to process. */ - WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) - goto out; - - mutex_lock(&rps->lock); - - pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - - adj = rps->last_adj; - new_delay = rps->cur_freq; - min = rps->min_freq_softlimit; - max = rps->max_freq_softlimit; - if (client_boost) - max = rps->max_freq; - if (client_boost && new_delay < rps->boost_freq) { - new_delay = rps->boost_freq; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { - if (adj > 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - - if (new_delay >= rps->max_freq_softlimit) - adj = 0; - } else if (client_boost) { - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (rps->cur_freq > rps->efficient_freq) - new_delay = rps->efficient_freq; - else if (rps->cur_freq > rps->min_freq_softlimit) - new_delay = rps->min_freq_softlimit; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { - if (adj < 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - - if (new_delay <= rps->min_freq_softlimit) - adj = 0; - } else { /* unknown event */ - adj = 0; - } - - rps->last_adj = adj; - - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt - */ - new_delay += adj; - new_delay = clamp_t(int, new_delay, min, max); - - if (intel_set_rps(dev_priv, new_delay)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; - } - - mutex_unlock(&rps->lock); - -out: - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - spin_lock_irq(&dev_priv->irq_lock); - if (rps->interrupts_enabled) - gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); -} - - /** - * ivybridge_parity_work - Workqueue called when a parity error interrupt + * ivb_parity_work - Workqueue called when a parity error interrupt * occurred. * @work: workqueue struct * @@ -1438,10 +901,11 @@ out: * this event, userspace should try to remap the bad rows since statistically * it is likely the same row is more likely to go bad again. */ -static void ivybridge_parity_work(struct work_struct *work) +static void ivb_parity_work(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), l3_parity.error_work); + struct intel_gt *gt = &dev_priv->gt; u32 error_status, row, bank, subbank; char *parity_event[6]; u32 misccpctl; @@ -1503,144 +967,13 @@ static void ivybridge_parity_work(struct work_struct *work) out: WARN_ON(dev_priv->l3_parity.which_slice); - spin_lock_irq(&dev_priv->irq_lock); - gen5_enable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv)); - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(>->irq_lock); + gen5_gt_enable_irq(gt, GT_PARITY_ERROR(dev_priv)); + spin_unlock_irq(>->irq_lock); mutex_unlock(&dev_priv->drm.struct_mutex); } -static void ivybridge_parity_error_irq_handler(struct drm_i915_private *dev_priv, - u32 iir) -{ - if (!HAS_L3_DPF(dev_priv)) - return; - - spin_lock(&dev_priv->irq_lock); - gen5_disable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv)); - spin_unlock(&dev_priv->irq_lock); - - iir &= GT_PARITY_ERROR(dev_priv); - if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1) - dev_priv->l3_parity.which_slice |= 1 << 1; - - if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) - dev_priv->l3_parity.which_slice |= 1 << 0; - - queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work); -} - -static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, - u32 gt_iir) -{ - if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); - if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); -} - -static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, - u32 gt_iir) -{ - if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); - if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); - if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[BCS0]); - - if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | - GT_BSD_CS_ERROR_INTERRUPT | - GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) - DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir); - - if (gt_iir & GT_PARITY_ERROR(dev_priv)) - ivybridge_parity_error_irq_handler(dev_priv, gt_iir); -} - -static void -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) -{ - bool tasklet = false; - - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) - tasklet = true; - - if (iir & GT_RENDER_USER_INTERRUPT) { - intel_engine_breadcrumbs_irq(engine); - tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); - } - - if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); -} - -static void gen8_gt_irq_ack(struct drm_i915_private *i915, - u32 master_ctl, u32 gt_iir[4]) -{ - void __iomem * const regs = i915->uncore.regs; - -#define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ - GEN8_GT_BCS_IRQ | \ - GEN8_GT_VCS0_IRQ | \ - GEN8_GT_VCS1_IRQ | \ - GEN8_GT_VECS_IRQ | \ - GEN8_GT_PM_IRQ | \ - GEN8_GT_GUC_IRQ) - - if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); - if (likely(gt_iir[0])) - raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); - } - - if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { - gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); - if (likely(gt_iir[1])) - raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); - } - - if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); - if (likely(gt_iir[2])) - raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]); - } - - if (master_ctl & GEN8_GT_VECS_IRQ) { - gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); - if (likely(gt_iir[3])) - raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); - } -} - -static void gen8_gt_irq_handler(struct drm_i915_private *i915, - u32 master_ctl, u32 gt_iir[4]) -{ - if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gen8_cs_irq_handler(i915->engine[RCS0], - gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[BCS0], - gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); - } - - if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { - gen8_cs_irq_handler(i915->engine[VCS0], - gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[VCS1], - gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); - } - - if (master_ctl & GEN8_GT_VECS_IRQ) { - gen8_cs_irq_handler(i915->engine[VECS0], - gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); - } - - if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(i915, gt_iir[2]); - gen9_guc_irq_handler(i915, gt_iir[2]); - } -} - static bool gen11_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { @@ -1657,6 +990,26 @@ static bool gen11_port_hotplug_long_detect(enum hpd_pin pin, u32 val) } } +static bool gen12_port_hotplug_long_detect(enum hpd_pin pin, u32 val) +{ + switch (pin) { + case HPD_PORT_D: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC1); + case HPD_PORT_E: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC2); + case HPD_PORT_F: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC3); + case HPD_PORT_G: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC4); + case HPD_PORT_H: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC5); + case HPD_PORT_I: + return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC6); + default: + return false; + } +} + static bool bxt_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { @@ -1675,9 +1028,11 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { case HPD_PORT_A: - return val & ICP_DDIA_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_A); case HPD_PORT_B: - return val & ICP_DDIB_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_B); + case HPD_PORT_C: + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_C); default: return false; } @@ -1699,6 +1054,26 @@ static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) } } +static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) +{ + switch (pin) { + case HPD_PORT_D: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); + case HPD_PORT_E: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); + case HPD_PORT_F: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); + case HPD_PORT_G: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); + case HPD_PORT_H: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC5); + case HPD_PORT_I: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC6); + default: + return false; + } +} + static bool spt_port_hotplug2_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { @@ -1778,6 +1153,8 @@ static void intel_get_hpd_pins(struct drm_i915_private *dev_priv, { enum hpd_pin pin; + BUILD_BUG_ON(BITS_PER_TYPE(*pin_mask) < HPD_NUM_PINS); + for_each_hpd_pin(pin) { if ((hpd[pin] & hotplug_trigger) == 0) continue; @@ -1888,64 +1265,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, res1, res2); } -/* The RPS events need forcewake, so we add them to a work queue and mask their - * IMR bits until the work is done. Other interrupts can be processed without - * the work queue. */ -static void gen11_rps_irq_handler(struct drm_i915_private *i915, u32 pm_iir) -{ - struct intel_rps *rps = &i915->gt_pm.rps; - const u32 events = i915->pm_rps_events & pm_iir; - - lockdep_assert_held(&i915->irq_lock); - - if (unlikely(!events)) - return; - - gen6_mask_pm_irq(i915, events); - - if (!rps->interrupts_enabled) - return; - - rps->pm_iir |= events; - schedule_work(&rps->work); -} - -static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (pm_iir & dev_priv->pm_rps_events) { - spin_lock(&dev_priv->irq_lock); - gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); - if (rps->interrupts_enabled) { - rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&rps->work); - } - spin_unlock(&dev_priv->irq_lock); - } - - if (INTEL_GEN(dev_priv) >= 8) - return; - - if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); - - if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); -} - -static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) -{ - if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) - intel_guc_to_host_event_handler(&dev_priv->guc); -} - -static void gen11_guc_irq_handler(struct drm_i915_private *i915, u16 iir) -{ - if (iir & GEN11_GUC_INTR_GUC2HOST) - intel_guc_to_host_event_handler(&i915->guc); -} - static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) { enum pipe pipe; @@ -1962,7 +1281,7 @@ static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { - int pipe; + enum pipe pipe; spin_lock(&dev_priv->irq_lock); @@ -1987,6 +1306,7 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, status_mask = PIPE_FIFO_UNDERRUN_STATUS; switch (pipe) { + default: case PIPE_A: iir_bit = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; break; @@ -2185,8 +1505,7 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv, static irqreturn_t valleyview_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) @@ -2254,9 +1573,9 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); if (gt_iir) - snb_gt_irq_handler(dev_priv, gt_iir); + gen6_gt_irq_handler(&dev_priv->gt, gt_iir); if (pm_iir) - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -2271,8 +1590,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) static irqreturn_t cherryview_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) @@ -2313,7 +1631,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) ier = I915_READ(VLV_IER); I915_WRITE(VLV_IER, 0); - gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); + gen8_gt_irq_ack(&dev_priv->gt, master_ctl, gt_iir); if (iir & I915_DISPLAY_PORT_INTERRUPT) hotplug_status = i9xx_hpd_irq_ack(dev_priv); @@ -2337,7 +1655,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) I915_WRITE(VLV_IER, ier); I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); - gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); + gen8_gt_irq_handler(&dev_priv->gt, master_ctl, gt_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -2384,7 +1702,7 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx); @@ -2470,7 +1788,7 @@ static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt); @@ -2504,12 +1822,36 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) cpt_serr_int_handler(dev_priv); } -static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, - const u32 *pins) +static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; - u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; + u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; + bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); + const u32 *pins; + + if (HAS_PCH_TGP(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; + tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; + tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; + pins = hpd_tgp; + } else if (HAS_PCH_JSP(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; + tc_hotplug_trigger = 0; + pins = hpd_tgp; + } else if (HAS_PCH_MCC(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; + pins = hpd_icp; + } else { + WARN(!HAS_PCH_ICP(dev_priv), + "Unrecognized PCH type 0x%x\n", INTEL_PCH_TYPE(dev_priv)); + + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; + pins = hpd_icp; + } if (ddi_hotplug_trigger) { u32 dig_hotplug_reg; @@ -2532,7 +1874,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, tc_hotplug_trigger, dig_hotplug_reg, pins, - icp_tc_port_hotplug_long_detect); + tc_port_hotplug_long_detect); } if (pin_mask) @@ -2637,7 +1979,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, } if (IS_GEN(dev_priv, 5) && de_iir & DE_PCU_EVENT) - ironlake_rps_change_irq_handler(dev_priv); + gen5_rps_irq_handler(&dev_priv->gt.rps); } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -2689,10 +2031,9 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, * 4 - Process the interrupt(s) that had bits set in the IIRs. * 5 - Re-enable Master Interrupt Control. */ -static irqreturn_t ironlake_irq_handler(int irq, void *arg) +static irqreturn_t ilk_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; u32 de_iir, gt_iir, de_ier, sde_ier = 0; irqreturn_t ret = IRQ_NONE; @@ -2723,9 +2064,9 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) I915_WRITE(GTIIR, gt_iir); ret = IRQ_HANDLED; if (INTEL_GEN(dev_priv) >= 6) - snb_gt_irq_handler(dev_priv, gt_iir); + gen6_gt_irq_handler(&dev_priv->gt, gt_iir); else - ilk_gt_irq_handler(dev_priv, gt_iir); + gen5_gt_irq_handler(&dev_priv->gt, gt_iir); } de_iir = I915_READ(DEIIR); @@ -2743,7 +2084,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (pm_iir) { I915_WRITE(GEN6_PMIIR, pm_iir); ret = IRQ_HANDLED; - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); } } @@ -2778,6 +2119,16 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) u32 pin_mask = 0, long_mask = 0; u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK; u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK; + long_pulse_detect_func long_pulse_detect; + const u32 *hpd; + + if (INTEL_GEN(dev_priv) >= 12) { + long_pulse_detect = gen12_port_hotplug_long_detect; + hpd = hpd_gen12; + } else { + long_pulse_detect = gen11_port_hotplug_long_detect; + hpd = hpd_gen11; + } if (trigger_tc) { u32 dig_hotplug_reg; @@ -2786,8 +2137,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) I915_WRITE(GEN11_TC_HOTPLUG_CTL, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc, - dig_hotplug_reg, hpd_gen11, - gen11_port_hotplug_long_detect); + dig_hotplug_reg, hpd, long_pulse_detect); } if (trigger_tbt) { @@ -2797,8 +2147,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) I915_WRITE(GEN11_TBT_HOTPLUG_CTL, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt, - dig_hotplug_reg, hpd_gen11, - gen11_port_hotplug_long_detect); + dig_hotplug_reg, hpd, long_pulse_detect); } if (pin_mask) @@ -2809,23 +2158,77 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) { - u32 mask = GEN8_AUX_CHANNEL_A; + u32 mask; + + if (INTEL_GEN(dev_priv) >= 12) + return TGL_DE_PORT_AUX_DDIA | + TGL_DE_PORT_AUX_DDIB | + TGL_DE_PORT_AUX_DDIC | + TGL_DE_PORT_AUX_USBC1 | + TGL_DE_PORT_AUX_USBC2 | + TGL_DE_PORT_AUX_USBC3 | + TGL_DE_PORT_AUX_USBC4 | + TGL_DE_PORT_AUX_USBC5 | + TGL_DE_PORT_AUX_USBC6; + + mask = GEN8_AUX_CHANNEL_A; if (INTEL_GEN(dev_priv) >= 9) mask |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; - if (IS_CNL_WITH_PORT_F(dev_priv)) + if (IS_CNL_WITH_PORT_F(dev_priv) || IS_GEN(dev_priv, 11)) mask |= CNL_AUX_CHANNEL_F; - if (INTEL_GEN(dev_priv) >= 11) - mask |= ICL_AUX_CHANNEL_E | - CNL_AUX_CHANNEL_F; + if (IS_GEN(dev_priv, 11)) + mask |= ICL_AUX_CHANNEL_E; return mask; } +static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_DE_PIPE_IRQ_FAULT_ERRORS; + else if (INTEL_GEN(dev_priv) >= 9) + return GEN9_DE_PIPE_IRQ_FAULT_ERRORS; + else + return GEN8_DE_PIPE_IRQ_FAULT_ERRORS; +} + +static void +gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) +{ + bool found = false; + + if (iir & GEN8_DE_MISC_GSE) { + intel_opregion_asle_intr(dev_priv); + found = true; + } + + if (iir & GEN8_DE_EDP_PSR) { + u32 psr_iir; + i915_reg_t iir_reg; + + if (INTEL_GEN(dev_priv) >= 12) + iir_reg = TRANS_PSR_IIR(dev_priv->psr.transcoder); + else + iir_reg = EDP_PSR_IIR; + + psr_iir = I915_READ(iir_reg); + I915_WRITE(iir_reg, psr_iir); + + if (psr_iir) + found = true; + + intel_psr_irq_handler(dev_priv, psr_iir); + } + + if (!found) + DRM_ERROR("Unexpected DE Misc interrupt\n"); +} + static irqreturn_t gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) { @@ -2836,29 +2239,12 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (master_ctl & GEN8_DE_MISC_IRQ) { iir = I915_READ(GEN8_DE_MISC_IIR); if (iir) { - bool found = false; - I915_WRITE(GEN8_DE_MISC_IIR, iir); ret = IRQ_HANDLED; - - if (iir & GEN8_DE_MISC_GSE) { - intel_opregion_asle_intr(dev_priv); - found = true; - } - - if (iir & GEN8_DE_EDP_PSR) { - u32 psr_iir = I915_READ(EDP_PSR_IIR); - - intel_psr_irq_handler(dev_priv, psr_iir); - I915_WRITE(EDP_PSR_IIR, psr_iir); - found = true; - } - - if (!found) - DRM_ERROR("Unexpected DE Misc interrupt\n"); - } - else + gen8_de_misc_irq_handler(dev_priv, iir); + } else { DRM_ERROR("The master control interrupt lied (DE MISC)!\n"); + } } if (INTEL_GEN(dev_priv) >= 11 && (master_ctl & GEN11_DE_HPD_IRQ)) { @@ -2938,12 +2324,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (iir & GEN8_PIPE_FIFO_UNDERRUN) intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe); - fault_errors = iir; - if (INTEL_GEN(dev_priv) >= 9) - fault_errors &= GEN9_DE_PIPE_IRQ_FAULT_ERRORS; - else - fault_errors &= GEN8_DE_PIPE_IRQ_FAULT_ERRORS; - + fault_errors = iir & gen8_de_pipe_fault_mask(dev_priv); if (fault_errors) DRM_ERROR("Fault errors on pipe %c: 0x%08x\n", pipe_name(pipe), @@ -2962,10 +2343,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) I915_WRITE(SDEIIR, iir); ret = IRQ_HANDLED; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_MCC) - icp_irq_handler(dev_priv, iir, hpd_mcc); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_irq_handler(dev_priv, iir, hpd_icp); + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + icp_irq_handler(dev_priv, iir); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) spt_irq_handler(dev_priv, iir); else @@ -3002,7 +2381,7 @@ static inline void gen8_master_intr_enable(void __iomem * const regs) static irqreturn_t gen8_irq_handler(int irq, void *arg) { - struct drm_i915_private *dev_priv = to_i915(arg); + struct drm_i915_private *dev_priv = arg; void __iomem * const regs = dev_priv->uncore.regs; u32 master_ctl; u32 gt_iir[4]; @@ -3017,7 +2396,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) } /* Find, clear, then process each source of interrupt */ - gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); + gen8_gt_irq_ack(&dev_priv->gt, master_ctl, gt_iir); /* IRQs are synced during runtime_suspend, we don't require a wakeref */ if (master_ctl & ~GEN8_GT_IRQS) { @@ -3028,140 +2407,15 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) gen8_master_intr_enable(regs); - gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); + gen8_gt_irq_handler(&dev_priv->gt, master_ctl, gt_iir); return IRQ_HANDLED; } static u32 -gen11_gt_engine_identity(struct drm_i915_private * const i915, - const unsigned int bank, const unsigned int bit) -{ - void __iomem * const regs = i915->uncore.regs; - u32 timeout_ts; - u32 ident; - - lockdep_assert_held(&i915->irq_lock); - - raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); - - /* - * NB: Specs do not specify how long to spin wait, - * so we do ~100us as an educated guess. - */ - timeout_ts = (local_clock() >> 10) + 100; - do { - ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank)); - } while (!(ident & GEN11_INTR_DATA_VALID) && - !time_after32(local_clock() >> 10, timeout_ts)); - - if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { - DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", - bank, bit, ident); - return 0; - } - - raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), - GEN11_INTR_DATA_VALID); - - return ident; -} - -static void -gen11_other_irq_handler(struct drm_i915_private * const i915, - const u8 instance, const u16 iir) -{ - if (instance == OTHER_GUC_INSTANCE) - return gen11_guc_irq_handler(i915, iir); - - if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(i915, iir); - - WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", - instance, iir); -} - -static void -gen11_engine_irq_handler(struct drm_i915_private * const i915, - const u8 class, const u8 instance, const u16 iir) -{ - struct intel_engine_cs *engine; - - if (instance <= MAX_ENGINE_INSTANCE) - engine = i915->engine_class[class][instance]; - else - engine = NULL; - - if (likely(engine)) - return gen8_cs_irq_handler(engine, iir); - - WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", - class, instance); -} - -static void -gen11_gt_identity_handler(struct drm_i915_private * const i915, - const u32 identity) +gen11_gu_misc_irq_ack(struct intel_gt *gt, const u32 master_ctl) { - const u8 class = GEN11_INTR_ENGINE_CLASS(identity); - const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity); - const u16 intr = GEN11_INTR_ENGINE_INTR(identity); - - if (unlikely(!intr)) - return; - - if (class <= COPY_ENGINE_CLASS) - return gen11_engine_irq_handler(i915, class, instance, intr); - - if (class == OTHER_CLASS) - return gen11_other_irq_handler(i915, instance, intr); - - WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n", - class, instance, intr); -} - -static void -gen11_gt_bank_handler(struct drm_i915_private * const i915, - const unsigned int bank) -{ - void __iomem * const regs = i915->uncore.regs; - unsigned long intr_dw; - unsigned int bit; - - lockdep_assert_held(&i915->irq_lock); - - intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); - - for_each_set_bit(bit, &intr_dw, 32) { - const u32 ident = gen11_gt_engine_identity(i915, bank, bit); - - gen11_gt_identity_handler(i915, ident); - } - - /* Clear must be after shared has been served for engine */ - raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); -} - -static void -gen11_gt_irq_handler(struct drm_i915_private * const i915, - const u32 master_ctl) -{ - unsigned int bank; - - spin_lock(&i915->irq_lock); - - for (bank = 0; bank < 2; bank++) { - if (master_ctl & GEN11_GT_DW_IRQ(bank)) - gen11_gt_bank_handler(i915, bank); - } - - spin_unlock(&i915->irq_lock); -} - -static u32 -gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl) -{ - void __iomem * const regs = dev_priv->uncore.regs; + void __iomem * const regs = gt->uncore->regs; u32 iir; if (!(master_ctl & GEN11_GU_MISC_IRQ)) @@ -3175,10 +2429,10 @@ gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl) } static void -gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, const u32 iir) +gen11_gu_misc_irq_handler(struct intel_gt *gt, const u32 iir) { if (iir & GEN11_GU_MISC_GSE) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(gt->i915); } static inline u32 gen11_master_intr_disable(void __iomem * const regs) @@ -3199,53 +2453,74 @@ static inline void gen11_master_intr_enable(void __iomem * const regs) raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ); } -static irqreturn_t gen11_irq_handler(int irq, void *arg) +static void +gen11_display_irq_handler(struct drm_i915_private *i915) +{ + void __iomem * const regs = i915->uncore.regs; + const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL); + + disable_rpm_wakeref_asserts(&i915->runtime_pm); + /* + * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ + * for the display related bits. + */ + raw_reg_write(regs, GEN11_DISPLAY_INT_CTL, 0x0); + gen8_de_irq_handler(i915, disp_ctl); + raw_reg_write(regs, GEN11_DISPLAY_INT_CTL, + GEN11_DISPLAY_IRQ_ENABLE); + + enable_rpm_wakeref_asserts(&i915->runtime_pm); +} + +static __always_inline irqreturn_t +__gen11_irq_handler(struct drm_i915_private * const i915, + u32 (*intr_disable)(void __iomem * const regs), + void (*intr_enable)(void __iomem * const regs)) { - struct drm_i915_private * const i915 = to_i915(arg); void __iomem * const regs = i915->uncore.regs; + struct intel_gt *gt = &i915->gt; u32 master_ctl; u32 gu_misc_iir; if (!intel_irqs_enabled(i915)) return IRQ_NONE; - master_ctl = gen11_master_intr_disable(regs); + master_ctl = intr_disable(regs); if (!master_ctl) { - gen11_master_intr_enable(regs); + intr_enable(regs); return IRQ_NONE; } /* Find, clear, then process each source of interrupt. */ - gen11_gt_irq_handler(i915, master_ctl); + gen11_gt_irq_handler(gt, master_ctl); /* IRQs are synced during runtime_suspend, we don't require a wakeref */ - if (master_ctl & GEN11_DISPLAY_IRQ) { - const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL); - - disable_rpm_wakeref_asserts(&i915->runtime_pm); - /* - * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ - * for the display related bits. - */ - gen8_de_irq_handler(i915, disp_ctl); - enable_rpm_wakeref_asserts(&i915->runtime_pm); - } + if (master_ctl & GEN11_DISPLAY_IRQ) + gen11_display_irq_handler(i915); - gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl); + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - gen11_master_intr_enable(regs); + intr_enable(regs); - gen11_gu_misc_irq_handler(i915, gu_misc_iir); + gen11_gu_misc_irq_handler(gt, gu_misc_iir); return IRQ_HANDLED; } +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + return __gen11_irq_handler(arg, + gen11_master_intr_disable, + gen11_master_intr_enable); +} + /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ -static int i8xx_enable_vblank(struct drm_device *dev, unsigned int pipe) +int i8xx_enable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3255,19 +2530,26 @@ static int i8xx_enable_vblank(struct drm_device *dev, unsigned int pipe) return 0; } -static int i945gm_enable_vblank(struct drm_device *dev, unsigned int pipe) +int i915gm_enable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); - if (dev_priv->i945gm_vblank.enabled++ == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + /* + * Vblank interrupts fail to wake the device up from C2+. + * Disabling render clock gating during C-states avoids + * the problem. There is a small power cost so we do this + * only when vblank interrupts are actually enabled. + */ + if (dev_priv->vblank_enabled++ == 0) + I915_WRITE(SCPD0, _MASKED_BIT_ENABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); - return i8xx_enable_vblank(dev, pipe); + return i8xx_enable_vblank(crtc); } -static int i965_enable_vblank(struct drm_device *dev, unsigned int pipe) +int i965_enable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3278,9 +2560,10 @@ static int i965_enable_vblank(struct drm_device *dev, unsigned int pipe) return 0; } -static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe) +int ilk_enable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); @@ -3293,14 +2576,15 @@ static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe) * PSR is active as no frames are generated. */ if (HAS_PSR(dev_priv)) - drm_vblank_restore(dev, pipe); + drm_crtc_vblank_restore(crtc); return 0; } -static int gen8_enable_vblank(struct drm_device *dev, unsigned int pipe) +int bdw_enable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3311,7 +2595,7 @@ static int gen8_enable_vblank(struct drm_device *dev, unsigned int pipe) * PSR is active as no frames are generated, so check only for PSR. */ if (HAS_PSR(dev_priv)) - drm_vblank_restore(dev, pipe); + drm_crtc_vblank_restore(crtc); return 0; } @@ -3319,9 +2603,10 @@ static int gen8_enable_vblank(struct drm_device *dev, unsigned int pipe) /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ -static void i8xx_disable_vblank(struct drm_device *dev, unsigned int pipe) +void i8xx_disable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3329,19 +2614,20 @@ static void i8xx_disable_vblank(struct drm_device *dev, unsigned int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void i945gm_disable_vblank(struct drm_device *dev, unsigned int pipe) +void i915gm_disable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); - i8xx_disable_vblank(dev, pipe); + i8xx_disable_vblank(crtc); - if (--dev_priv->i945gm_vblank.enabled == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + if (--dev_priv->vblank_enabled == 0) + I915_WRITE(SCPD0, _MASKED_BIT_DISABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); } -static void i965_disable_vblank(struct drm_device *dev, unsigned int pipe) +void i965_disable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3350,9 +2636,10 @@ static void i965_disable_vblank(struct drm_device *dev, unsigned int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void ironlake_disable_vblank(struct drm_device *dev, unsigned int pipe) +void ilk_disable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); @@ -3362,9 +2649,10 @@ static void ironlake_disable_vblank(struct drm_device *dev, unsigned int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) +void bdw_disable_vblank(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum pipe pipe = to_intel_crtc(crtc)->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3372,60 +2660,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void i945gm_vblank_work_func(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, i945gm_vblank.work); - - /* - * Vblank interrupts fail to wake up the device from C3, - * hence we want to prevent C3 usage while vblank interrupts - * are enabled. - */ - pm_qos_update_request(&dev_priv->i945gm_vblank.pm_qos, - READ_ONCE(dev_priv->i945gm_vblank.enabled) ? - dev_priv->i945gm_vblank.c3_disable_latency : - PM_QOS_DEFAULT_VALUE); -} - -static int cstate_disable_latency(const char *name) -{ - const struct cpuidle_driver *drv; - int i; - - drv = cpuidle_get_driver(); - if (!drv) - return 0; - - for (i = 0; i < drv->state_count; i++) { - const struct cpuidle_state *state = &drv->states[i]; - - if (!strcmp(state->name, name)) - return state->exit_latency ? - state->exit_latency - 1 : 0; - } - - return 0; -} - -static void i945gm_vblank_work_init(struct drm_i915_private *dev_priv) -{ - INIT_WORK(&dev_priv->i945gm_vblank.work, - i945gm_vblank_work_func); - - dev_priv->i945gm_vblank.c3_disable_latency = - cstate_disable_latency("C3"); - pm_qos_add_request(&dev_priv->i945gm_vblank.pm_qos, - PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); -} - -static void i945gm_vblank_work_fini(struct drm_i915_private *dev_priv) -{ - cancel_work_sync(&dev_priv->i945gm_vblank.work); - pm_qos_remove_request(&dev_priv->i945gm_vblank.pm_qos); -} - static void ibx_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3447,10 +2681,8 @@ static void ibx_irq_reset(struct drm_i915_private *dev_priv) * * This function needs to be called before interrupts are enabled. */ -static void ibx_irq_pre_postinstall(struct drm_device *dev) +static void ibx_irq_pre_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (HAS_PCH_NOP(dev_priv)) return; @@ -3459,26 +2691,17 @@ static void ibx_irq_pre_postinstall(struct drm_device *dev) POSTING_READ(SDEIER); } -static void gen5_gt_irq_reset(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - - GEN3_IRQ_RESET(uncore, GT); - if (INTEL_GEN(dev_priv) >= 6) - GEN3_IRQ_RESET(uncore, GEN6_PM); -} - static void vlv_display_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; if (IS_CHERRYVIEW(dev_priv)) - I915_WRITE(DPINVGTT, DPINVGTT_STATUS_MASK_CHV); + intel_uncore_write(uncore, DPINVGTT, DPINVGTT_STATUS_MASK_CHV); else - I915_WRITE(DPINVGTT, DPINVGTT_STATUS_MASK); + intel_uncore_write(uncore, DPINVGTT, DPINVGTT_STATUS_MASK); i915_hotplug_interrupt_update_locked(dev_priv, 0xffffffff, 0); - I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); + intel_uncore_write(uncore, PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); i9xx_pipestat_irq_reset(dev_priv); @@ -3519,33 +2742,30 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) /* drm_dma.h hooks */ -static void ironlake_irq_reset(struct drm_device *dev) +static void ilk_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; GEN3_IRQ_RESET(uncore, DE); if (IS_GEN(dev_priv, 7)) - I915_WRITE(GEN7_ERR_INT, 0xffffffff); + intel_uncore_write(uncore, GEN7_ERR_INT, 0xffffffff); if (IS_HASWELL(dev_priv)) { - I915_WRITE(EDP_PSR_IMR, 0xffffffff); - I915_WRITE(EDP_PSR_IIR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); } - gen5_gt_irq_reset(dev_priv); + gen5_gt_irq_reset(&dev_priv->gt); ibx_irq_reset(dev_priv); } -static void valleyview_irq_reset(struct drm_device *dev) +static void valleyview_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(VLV_MASTER_IER, 0); POSTING_READ(VLV_MASTER_IER); - gen5_gt_irq_reset(dev_priv); + gen5_gt_irq_reset(&dev_priv->gt); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -3553,28 +2773,17 @@ static void valleyview_irq_reset(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); } -static void gen8_gt_irq_reset(struct drm_i915_private *dev_priv) +static void gen8_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - - GEN8_IRQ_RESET_NDX(uncore, GT, 0); - GEN8_IRQ_RESET_NDX(uncore, GT, 1); - GEN8_IRQ_RESET_NDX(uncore, GT, 2); - GEN8_IRQ_RESET_NDX(uncore, GT, 3); -} - -static void gen8_irq_reset(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; gen8_master_intr_disable(dev_priv->uncore.regs); - gen8_gt_irq_reset(dev_priv); + gen8_gt_irq_reset(&dev_priv->gt); - I915_WRITE(EDP_PSR_IMR, 0xffffffff); - I915_WRITE(EDP_PSR_IIR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, @@ -3589,39 +2798,30 @@ static void gen8_irq_reset(struct drm_device *dev) ibx_irq_reset(dev_priv); } -static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv) +static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) { - /* Disable RCS, BCS, VCS and VECS class engines. */ - I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, 0); - I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, 0); - - /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ - I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~0); - I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~0); - I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0); - I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0); - I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0); - - I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); - I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); - I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0); - I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~0); -} - -static void gen11_irq_reset(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; - gen11_master_intr_disable(dev_priv->uncore.regs); + intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0); + + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; - gen11_gt_irq_reset(dev_priv); + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; - I915_WRITE(GEN11_DISPLAY_INT_CTL, 0); + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; - I915_WRITE(EDP_PSR_IMR, 0xffffffff); - I915_WRITE(EDP_PSR_IIR, 0xffffffff); + intel_uncore_write(uncore, TRANS_PSR_IMR(trans), 0xffffffff); + intel_uncore_write(uncore, TRANS_PSR_IIR(trans), 0xffffffff); + } + } else { + intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); + } for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, @@ -3631,13 +2831,24 @@ static void gen11_irq_reset(struct drm_device *dev) GEN3_IRQ_RESET(uncore, GEN8_DE_PORT_); GEN3_IRQ_RESET(uncore, GEN8_DE_MISC_); GEN3_IRQ_RESET(uncore, GEN11_DE_HPD_); - GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); - GEN3_IRQ_RESET(uncore, GEN8_PCU_); if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); } +static void gen11_irq_reset(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + + gen11_master_intr_disable(dev_priv->uncore.regs); + + gen11_gt_irq_reset(&dev_priv->gt); + gen11_display_irq_reset(dev_priv); + + GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); + GEN3_IRQ_RESET(uncore, GEN8_PCU_); +} + void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask) { @@ -3680,18 +2891,17 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ - synchronize_irq(dev_priv->drm.irq); + intel_synchronize_irq(dev_priv); } -static void cherryview_irq_reset(struct drm_device *dev) +static void cherryview_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; I915_WRITE(GEN8_MASTER_IRQ, 0); POSTING_READ(GEN8_MASTER_IRQ); - gen8_gt_irq_reset(dev_priv); + gen8_gt_irq_reset(&dev_priv->gt); GEN3_IRQ_RESET(uncore, GEN8_PCU_); @@ -3756,33 +2966,63 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) ibx_hpd_detection_setup(dev_priv); } -static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv) +static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 ddi_hotplug_enable_mask, + u32 tc_hotplug_enable_mask) { u32 hotplug; hotplug = I915_READ(SHOTPLUG_CTL_DDI); - hotplug |= ICP_DDIA_HPD_ENABLE | - ICP_DDIB_HPD_ENABLE; + hotplug |= ddi_hotplug_enable_mask; I915_WRITE(SHOTPLUG_CTL_DDI, hotplug); - hotplug = I915_READ(SHOTPLUG_CTL_TC); - hotplug |= ICP_TC_HPD_ENABLE(PORT_TC1) | - ICP_TC_HPD_ENABLE(PORT_TC2) | - ICP_TC_HPD_ENABLE(PORT_TC3) | - ICP_TC_HPD_ENABLE(PORT_TC4); - I915_WRITE(SHOTPLUG_CTL_TC, hotplug); + if (tc_hotplug_enable_mask) { + hotplug = I915_READ(SHOTPLUG_CTL_TC); + hotplug |= tc_hotplug_enable_mask; + I915_WRITE(SHOTPLUG_CTL_TC, hotplug); + } } -static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, + u32 sde_ddi_mask, u32 sde_tc_mask, + u32 ddi_enable_mask, u32 tc_enable_mask, + const u32 *pins) { u32 hotplug_irqs, enabled_irqs; - hotplug_irqs = SDE_DDI_MASK_ICP | SDE_TC_MASK_ICP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_icp); + hotplug_irqs = sde_ddi_mask | sde_tc_mask; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins); + + I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - icp_hpd_detection_setup(dev_priv); + icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask); +} + +/* + * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only the + * equivalent of SDE. + */ +static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), + hpd_icp); +} + +/* + * JSP behaves exactly the same as MCC above except that port C is mapped to + * the DDI-C pins instead of the TC1 pins. This means we should follow TGP's + * masks & tables rather than ICP's masks & tables. + */ +static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_TGP, 0, + TGP_DDI_HPD_ENABLE_MASK, 0, + hpd_tgp); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3807,9 +3047,11 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; + const u32 *hpd; u32 val; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_gen11); + hpd = INTEL_GEN(dev_priv) >= 12 ? hpd_gen12 : hpd_gen11; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd); hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK; val = I915_READ(GEN11_DE_HPD_IMR); @@ -3819,8 +3061,14 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) gen11_hpd_detection_setup(dev_priv); - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_hpd_irq_setup(dev_priv); + if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, + TGP_DDI_HPD_ENABLE_MASK, + TGP_TC_HPD_ENABLE_MASK, hpd_tgp); + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, + ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE_MASK, hpd_icp); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3852,6 +3100,9 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; + if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); + hotplug_irqs = SDE_HOTPLUG_MASK_SPT; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); @@ -3950,9 +3201,8 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) __bxt_hpd_detection_setup(dev_priv, enabled_irqs); } -static void ibx_irq_postinstall(struct drm_device *dev) +static void ibx_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 mask; if (HAS_PCH_NOP(dev_priv)) @@ -3975,48 +3225,8 @@ static void ibx_irq_postinstall(struct drm_device *dev) spt_hpd_detection_setup(dev_priv); } -static void gen5_gt_irq_postinstall(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_uncore *uncore = &dev_priv->uncore; - u32 pm_irqs, gt_irqs; - - pm_irqs = gt_irqs = 0; - - dev_priv->gt_irq_mask = ~0; - if (HAS_L3_DPF(dev_priv)) { - /* L3 parity interrupt is always unmasked. */ - dev_priv->gt_irq_mask = ~GT_PARITY_ERROR(dev_priv); - gt_irqs |= GT_PARITY_ERROR(dev_priv); - } - - gt_irqs |= GT_RENDER_USER_INTERRUPT; - if (IS_GEN(dev_priv, 5)) { - gt_irqs |= ILK_BSD_USER_INTERRUPT; - } else { - gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; - } - - GEN3_IRQ_INIT(uncore, GT, dev_priv->gt_irq_mask, gt_irqs); - - if (INTEL_GEN(dev_priv) >= 6) { - /* - * RPS interrupts will get enabled/disabled on demand when RPS - * itself is enabled/disabled. - */ - if (HAS_ENGINE(dev_priv, VECS0)) { - pm_irqs |= PM_VEBOX_USER_INTERRUPT; - dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT; - } - - dev_priv->pm_imr = 0xffffffff; - GEN3_IRQ_INIT(uncore, GEN6_PM, dev_priv->pm_imr, pm_irqs); - } -} - -static int ironlake_irq_postinstall(struct drm_device *dev) +static void ilk_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; u32 display_mask, extra_mask; @@ -4037,22 +3247,21 @@ static int ironlake_irq_postinstall(struct drm_device *dev) if (IS_HASWELL(dev_priv)) { gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); display_mask |= DE_EDP_PSR_INT_HSW; } dev_priv->irq_mask = ~display_mask; - ibx_irq_pre_postinstall(dev); + ibx_irq_pre_postinstall(dev_priv); GEN3_IRQ_INIT(uncore, DE, dev_priv->irq_mask, display_mask | extra_mask); - gen5_gt_irq_postinstall(dev); + gen5_gt_irq_postinstall(&dev_priv->gt); ilk_hpd_detection_setup(dev_priv); - ibx_irq_postinstall(dev); + ibx_irq_postinstall(dev_priv); if (IS_IRONLAKE_M(dev_priv)) { /* Enable PCU event interrupts @@ -4064,8 +3273,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev) ilk_enable_display_irq(dev_priv, DE_PCU_EVENT); spin_unlock_irq(&dev_priv->irq_lock); } - - return 0; } void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) @@ -4097,11 +3304,9 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) } -static int valleyview_irq_postinstall(struct drm_device *dev) +static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - gen5_gt_irq_postinstall(dev); + gen5_gt_irq_postinstall(&dev_priv->gt); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -4110,42 +3315,6 @@ static int valleyview_irq_postinstall(struct drm_device *dev) I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); POSTING_READ(VLV_MASTER_IER); - - return 0; -} - -static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - - /* These are interrupts we'll toggle with the ring mask register */ - u32 gt_interrupts[] = { - (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), - - (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), - - 0, - - (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) - }; - - dev_priv->pm_ier = 0x0; - dev_priv->pm_imr = ~dev_priv->pm_ier; - GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]); - GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]); - /* - * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. Same wil be the case for GuC interrupts. - */ - GEN8_IRQ_INIT_NDX(uncore, GT, 2, dev_priv->pm_imr, dev_priv->pm_ier); - GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]); } static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) @@ -4187,8 +3356,21 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; + + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; + + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; + + gen3_assert_iir_is_zero(uncore, TRANS_PSR_IIR(trans)); + } + } else { + gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); + } for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; @@ -4218,58 +3400,22 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) } } -static int gen8_irq_postinstall(struct drm_device *dev) +static void gen8_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (HAS_PCH_SPLIT(dev_priv)) - ibx_irq_pre_postinstall(dev); + ibx_irq_pre_postinstall(dev_priv); - gen8_gt_irq_postinstall(dev_priv); + gen8_gt_irq_postinstall(&dev_priv->gt); gen8_de_irq_postinstall(dev_priv); if (HAS_PCH_SPLIT(dev_priv)) - ibx_irq_postinstall(dev); + ibx_irq_postinstall(dev_priv); gen8_master_intr_enable(dev_priv->uncore.regs); - - return 0; -} - -static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) -{ - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; - - BUILD_BUG_ON(irqs & 0xffff0000); - - /* Enable RCS, BCS, VCS and VECS class interrupts. */ - I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, irqs << 16 | irqs); - I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, irqs << 16 | irqs); - - /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ - I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~(irqs << 16)); - I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~(irqs << 16)); - I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~(irqs | irqs << 16)); - I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16)); - I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16)); - - /* - * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. - */ - dev_priv->pm_ier = 0x0; - dev_priv->pm_imr = ~dev_priv->pm_ier; - I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); - I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); - - /* Same thing for GuC interrupts */ - I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0); - I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~0); } -static void icp_irq_postinstall(struct drm_device *dev) +static void icp_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 mask = SDE_GMBUS_ICP; WARN_ON(I915_READ(SDEIER) != 0); @@ -4279,36 +3425,41 @@ static void icp_irq_postinstall(struct drm_device *dev) gen3_assert_iir_is_zero(&dev_priv->uncore, SDEIIR); I915_WRITE(SDEIMR, ~mask); - icp_hpd_detection_setup(dev_priv); + if (HAS_PCH_TGP(dev_priv)) + icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, + TGP_TC_HPD_ENABLE_MASK); + else if (HAS_PCH_JSP(dev_priv)) + icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); + else if (HAS_PCH_MCC(dev_priv)) + icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE(PORT_TC1)); + else + icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE_MASK); } -static int gen11_irq_postinstall(struct drm_device *dev) +static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_uncore *uncore = &dev_priv->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_irq_postinstall(dev); + icp_irq_postinstall(dev_priv); - gen11_gt_irq_postinstall(dev_priv); + gen11_gt_irq_postinstall(&dev_priv->gt); gen8_de_irq_postinstall(dev_priv); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); - gen11_master_intr_enable(dev_priv->uncore.regs); + gen11_master_intr_enable(uncore->regs); POSTING_READ(GEN11_GFX_MSTR_IRQ); - - return 0; } -static int cherryview_irq_postinstall(struct drm_device *dev) +static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - gen8_gt_irq_postinstall(dev_priv); + gen8_gt_irq_postinstall(&dev_priv->gt); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -4317,13 +3468,10 @@ static int cherryview_irq_postinstall(struct drm_device *dev) I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); POSTING_READ(GEN8_MASTER_IRQ); - - return 0; } -static void i8xx_irq_reset(struct drm_device *dev) +static void i8xx_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; i9xx_pipestat_irq_reset(dev_priv); @@ -4331,9 +3479,8 @@ static void i8xx_irq_reset(struct drm_device *dev) GEN2_IRQ_RESET(uncore); } -static int i8xx_irq_postinstall(struct drm_device *dev) +static void i8xx_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; u16 enable_mask; @@ -4362,8 +3509,6 @@ static int i8xx_irq_postinstall(struct drm_device *dev) i915_enable_pipestat(dev_priv, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS); i915_enable_pipestat(dev_priv, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS); spin_unlock_irq(&dev_priv->irq_lock); - - return 0; } static void i8xx_error_irq_ack(struct drm_i915_private *i915, @@ -4444,8 +3589,7 @@ static void i9xx_error_irq_handler(struct drm_i915_private *dev_priv, static irqreturn_t i8xx_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) @@ -4475,7 +3619,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); + intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4488,9 +3632,8 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) return ret; } -static void i915_irq_reset(struct drm_device *dev) +static void i915_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; if (I915_HAS_HOTPLUG(dev_priv)) { @@ -4503,9 +3646,8 @@ static void i915_irq_reset(struct drm_device *dev) GEN3_IRQ_RESET(uncore, GEN2_); } -static int i915_irq_postinstall(struct drm_device *dev) +static void i915_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; u32 enable_mask; @@ -4543,14 +3685,11 @@ static int i915_irq_postinstall(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); i915_enable_asle_pipestat(dev_priv); - - return 0; } static irqreturn_t i915_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) @@ -4585,7 +3724,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); + intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4601,9 +3740,8 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) return ret; } -static void i965_irq_reset(struct drm_device *dev) +static void i965_irq_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); @@ -4614,9 +3752,8 @@ static void i965_irq_reset(struct drm_device *dev) GEN3_IRQ_RESET(uncore, GEN2_); } -static int i965_irq_postinstall(struct drm_device *dev) +static void i965_irq_postinstall(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_uncore *uncore = &dev_priv->uncore; u32 enable_mask; u32 error_mask; @@ -4666,8 +3803,6 @@ static int i965_irq_postinstall(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); i915_enable_asle_pipestat(dev_priv); - - return 0; } static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) @@ -4697,8 +3832,7 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) static irqreturn_t i965_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = arg; irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) @@ -4732,10 +3866,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); + intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); + intel_engine_signal_breadcrumbs(dev_priv->engine[VCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4761,54 +3895,17 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; - if (IS_I945GM(dev_priv)) - i945gm_vblank_work_init(dev_priv); - intel_hpd_init_work(dev_priv); - INIT_WORK(&rps->work, gen6_pm_rps_work); - - INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; - if (HAS_GUC_SCHED(dev_priv) && INTEL_GEN(dev_priv) < 11) - dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; - - /* Let's track the enabled rps events */ - if (IS_VALLEYVIEW(dev_priv)) - /* WaGsvRC0ResidencyMethod:vlv */ - dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; - else - dev_priv->pm_rps_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); - - /* We share the register with other engine */ - if (INTEL_GEN(dev_priv) > 9) - GEM_WARN_ON(dev_priv->pm_rps_events & 0xffff0000); - - rps->pm_intrmsk_mbz = 0; - - /* - * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - * - * TODO: verify if this can be reproduced on VLV,CHV. - */ - if (INTEL_GEN(dev_priv) <= 7) - rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (INTEL_GEN(dev_priv) >= 8) - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) - dev->driver->get_vblank_counter = g4x_get_vblank_counter; - else if (INTEL_GEN(dev_priv) >= 3) - dev->driver->get_vblank_counter = i915_get_vblank_counter; + /* pre-gen11 the guc irqs bits are in the upper 16 bits of the pm reg */ + if (HAS_GT_UC(dev_priv) && INTEL_GEN(dev_priv) < 11) + dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16; dev->vblank_disable_immediate = true; @@ -4831,86 +3928,22 @@ void intel_irq_init(struct drm_i915_private *dev_priv) */ dev_priv->hotplug.hpd_short_storm_enabled = !HAS_DP_MST(dev_priv); - dev->driver->get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos; - dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; - - if (IS_CHERRYVIEW(dev_priv)) { - dev->driver->irq_handler = cherryview_irq_handler; - dev->driver->irq_preinstall = cherryview_irq_reset; - dev->driver->irq_postinstall = cherryview_irq_postinstall; - dev->driver->irq_uninstall = cherryview_irq_reset; - dev->driver->enable_vblank = i965_enable_vblank; - dev->driver->disable_vblank = i965_disable_vblank; - dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev->driver->irq_handler = valleyview_irq_handler; - dev->driver->irq_preinstall = valleyview_irq_reset; - dev->driver->irq_postinstall = valleyview_irq_postinstall; - dev->driver->irq_uninstall = valleyview_irq_reset; - dev->driver->enable_vblank = i965_enable_vblank; - dev->driver->disable_vblank = i965_disable_vblank; - dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; - } else if (INTEL_GEN(dev_priv) >= 11) { - dev->driver->irq_handler = gen11_irq_handler; - dev->driver->irq_preinstall = gen11_irq_reset; - dev->driver->irq_postinstall = gen11_irq_postinstall; - dev->driver->irq_uninstall = gen11_irq_reset; - dev->driver->enable_vblank = gen8_enable_vblank; - dev->driver->disable_vblank = gen8_disable_vblank; - dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; - } else if (INTEL_GEN(dev_priv) >= 8) { - dev->driver->irq_handler = gen8_irq_handler; - dev->driver->irq_preinstall = gen8_irq_reset; - dev->driver->irq_postinstall = gen8_irq_postinstall; - dev->driver->irq_uninstall = gen8_irq_reset; - dev->driver->enable_vblank = gen8_enable_vblank; - dev->driver->disable_vblank = gen8_disable_vblank; - if (IS_GEN9_LP(dev_priv)) + if (HAS_GMCH(dev_priv)) { + if (I915_HAS_HOTPLUG(dev_priv)) + dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; + } else { + if (HAS_PCH_JSP(dev_priv)) + dev_priv->display.hpd_irq_setup = jsp_hpd_irq_setup; + else if (HAS_PCH_MCC(dev_priv)) + dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup; + else if (INTEL_GEN(dev_priv) >= 11) + dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; + else if (IS_GEN9_LP(dev_priv)) dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup; else dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup; - } else if (HAS_PCH_SPLIT(dev_priv)) { - dev->driver->irq_handler = ironlake_irq_handler; - dev->driver->irq_preinstall = ironlake_irq_reset; - dev->driver->irq_postinstall = ironlake_irq_postinstall; - dev->driver->irq_uninstall = ironlake_irq_reset; - dev->driver->enable_vblank = ironlake_enable_vblank; - dev->driver->disable_vblank = ironlake_disable_vblank; - dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup; - } else { - if (IS_GEN(dev_priv, 2)) { - dev->driver->irq_preinstall = i8xx_irq_reset; - dev->driver->irq_postinstall = i8xx_irq_postinstall; - dev->driver->irq_handler = i8xx_irq_handler; - dev->driver->irq_uninstall = i8xx_irq_reset; - dev->driver->enable_vblank = i8xx_enable_vblank; - dev->driver->disable_vblank = i8xx_disable_vblank; - } else if (IS_I945GM(dev_priv)) { - dev->driver->irq_preinstall = i915_irq_reset; - dev->driver->irq_postinstall = i915_irq_postinstall; - dev->driver->irq_uninstall = i915_irq_reset; - dev->driver->irq_handler = i915_irq_handler; - dev->driver->enable_vblank = i945gm_enable_vblank; - dev->driver->disable_vblank = i945gm_disable_vblank; - } else if (IS_GEN(dev_priv, 3)) { - dev->driver->irq_preinstall = i915_irq_reset; - dev->driver->irq_postinstall = i915_irq_postinstall; - dev->driver->irq_uninstall = i915_irq_reset; - dev->driver->irq_handler = i915_irq_handler; - dev->driver->enable_vblank = i8xx_enable_vblank; - dev->driver->disable_vblank = i8xx_disable_vblank; - } else { - dev->driver->irq_preinstall = i965_irq_reset; - dev->driver->irq_postinstall = i965_irq_postinstall; - dev->driver->irq_uninstall = i965_irq_reset; - dev->driver->irq_handler = i965_irq_handler; - dev->driver->enable_vblank = i965_enable_vblank; - dev->driver->disable_vblank = i965_disable_vblank; - } - if (I915_HAS_HOTPLUG(dev_priv)) - dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; } } @@ -4924,13 +3957,79 @@ void intel_irq_fini(struct drm_i915_private *i915) { int i; - if (IS_I945GM(i915)) - i945gm_vblank_work_fini(i915); - for (i = 0; i < MAX_L3_SLICES; ++i) kfree(i915->l3_parity.remap_info[i]); } +static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv) +{ + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + return cherryview_irq_handler; + else if (IS_VALLEYVIEW(dev_priv)) + return valleyview_irq_handler; + else if (IS_GEN(dev_priv, 4)) + return i965_irq_handler; + else if (IS_GEN(dev_priv, 3)) + return i915_irq_handler; + else + return i8xx_irq_handler; + } else { + if (INTEL_GEN(dev_priv) >= 11) + return gen11_irq_handler; + else if (INTEL_GEN(dev_priv) >= 8) + return gen8_irq_handler; + else + return ilk_irq_handler; + } +} + +static void intel_irq_reset(struct drm_i915_private *dev_priv) +{ + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + cherryview_irq_reset(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_irq_reset(dev_priv); + else if (IS_GEN(dev_priv, 4)) + i965_irq_reset(dev_priv); + else if (IS_GEN(dev_priv, 3)) + i915_irq_reset(dev_priv); + else + i8xx_irq_reset(dev_priv); + } else { + if (INTEL_GEN(dev_priv) >= 11) + gen11_irq_reset(dev_priv); + else if (INTEL_GEN(dev_priv) >= 8) + gen8_irq_reset(dev_priv); + else + ilk_irq_reset(dev_priv); + } +} + +static void intel_irq_postinstall(struct drm_i915_private *dev_priv) +{ + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + cherryview_irq_postinstall(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_irq_postinstall(dev_priv); + else if (IS_GEN(dev_priv, 4)) + i965_irq_postinstall(dev_priv); + else if (IS_GEN(dev_priv, 3)) + i915_irq_postinstall(dev_priv); + else + i8xx_irq_postinstall(dev_priv); + } else { + if (INTEL_GEN(dev_priv) >= 11) + gen11_irq_postinstall(dev_priv); + else if (INTEL_GEN(dev_priv) >= 8) + gen8_irq_postinstall(dev_priv); + else + ilk_irq_postinstall(dev_priv); + } +} + /** * intel_irq_install - enables the hardware interrupt * @dev_priv: i915 device instance @@ -4944,6 +4043,9 @@ void intel_irq_fini(struct drm_i915_private *i915) */ int intel_irq_install(struct drm_i915_private *dev_priv) { + int irq = dev_priv->drm.pdev->irq; + int ret; + /* * We enable some interrupt sources in our postinstall hooks, so mark * interrupts as enabled _before_ actually enabling them to avoid @@ -4951,7 +4053,20 @@ int intel_irq_install(struct drm_i915_private *dev_priv) */ dev_priv->runtime_pm.irqs_enabled = true; - return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); + dev_priv->drm.irq_enabled = true; + + intel_irq_reset(dev_priv); + + ret = request_irq(irq, intel_irq_handler(dev_priv), + IRQF_SHARED, DRIVER_NAME, dev_priv); + if (ret < 0) { + dev_priv->drm.irq_enabled = false; + return ret; + } + + intel_irq_postinstall(dev_priv); + + return ret; } /** @@ -4963,7 +4078,23 @@ int intel_irq_install(struct drm_i915_private *dev_priv) */ void intel_irq_uninstall(struct drm_i915_private *dev_priv) { - drm_irq_uninstall(&dev_priv->drm); + int irq = dev_priv->drm.pdev->irq; + + /* + * FIXME we can get called twice during driver probe + * error handling as well as during driver remove due to + * intel_modeset_driver_remove() calling us out of sequence. + * Would be nice if it didn't do that... + */ + if (!dev_priv->drm.irq_enabled) + return; + + dev_priv->drm.irq_enabled = false; + + intel_irq_reset(dev_priv); + + free_irq(irq, dev_priv); + intel_hpd_cancel_work(dev_priv); dev_priv->runtime_pm.irqs_enabled = false; } @@ -4977,9 +4108,9 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) { - dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); + intel_irq_reset(dev_priv); dev_priv->runtime_pm.irqs_enabled = false; - synchronize_irq(dev_priv->drm.irq); + intel_synchronize_irq(dev_priv); } /** @@ -4992,6 +4123,20 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) { dev_priv->runtime_pm.irqs_enabled = true; - dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); - dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); + intel_irq_reset(dev_priv); + intel_irq_postinstall(dev_priv); +} + +bool intel_irqs_enabled(struct drm_i915_private *dev_priv) +{ + /* + * We only use drm_irq_uninstall() at unload and VT switch, so + * this is the only thing we need to check. + */ + return dev_priv->runtime_pm.irqs_enabled; +} + +void intel_synchronize_irq(struct drm_i915_private *i915) +{ + synchronize_irq(i915->drm.pdev->irq); } diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index cb25dd213308..812c47a9c2d6 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -6,15 +6,21 @@ #ifndef __I915_IRQ_H__ #define __I915_IRQ_H__ +#include <linux/ktime.h> #include <linux/types.h> -#include "i915_drv.h" +#include "display/intel_display.h" +#include "i915_reg.h" +struct drm_crtc; +struct drm_device; +struct drm_display_mode; struct drm_i915_private; struct intel_crtc; +struct intel_uncore; -extern void intel_irq_init(struct drm_i915_private *dev_priv); -extern void intel_irq_fini(struct drm_i915_private *dev_priv); +void intel_irq_init(struct drm_i915_private *dev_priv); +void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); @@ -77,41 +83,83 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); - -static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, - u32 mask) -{ - return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; -} +u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask); void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); -static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) -{ - /* - * We only use drm_irq_uninstall() at unload and VT switch, so - * this is the only thing we need to check. - */ - return dev_priv->runtime_pm.irqs_enabled; -} +bool intel_irqs_enabled(struct drm_i915_private *dev_priv); +void intel_synchronize_irq(struct drm_i915_private *i915); int intel_get_crtc_scanline(struct intel_crtc *crtc); void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask); void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, u8 pipe_mask); -void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv); -void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv); -void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv); -void gen11_reset_guc_interrupts(struct drm_i915_private *i915); -void gen11_enable_guc_interrupts(struct drm_i915_private *i915); -void gen11_disable_guc_interrupts(struct drm_i915_private *i915); + +bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); + +u32 i915_get_vblank_counter(struct drm_crtc *crtc); +u32 g4x_get_vblank_counter(struct drm_crtc *crtc); + +int i8xx_enable_vblank(struct drm_crtc *crtc); +int i915gm_enable_vblank(struct drm_crtc *crtc); +int i965_enable_vblank(struct drm_crtc *crtc); +int ilk_enable_vblank(struct drm_crtc *crtc); +int bdw_enable_vblank(struct drm_crtc *crtc); +void i8xx_disable_vblank(struct drm_crtc *crtc); +void i915gm_disable_vblank(struct drm_crtc *crtc); +void i965_disable_vblank(struct drm_crtc *crtc); +void ilk_disable_vblank(struct drm_crtc *crtc); +void bdw_disable_vblank(struct drm_crtc *crtc); + +void gen2_irq_reset(struct intel_uncore *uncore); +void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, + i915_reg_t iir, i915_reg_t ier); + +void gen2_irq_init(struct intel_uncore *uncore, + u32 imr_val, u32 ier_val); +void gen3_irq_init(struct intel_uncore *uncore, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir); + +#define GEN8_IRQ_RESET_NDX(uncore, type, which) \ +({ \ + unsigned int which_ = which; \ + gen3_irq_reset((uncore), GEN8_##type##_IMR(which_), \ + GEN8_##type##_IIR(which_), GEN8_##type##_IER(which_)); \ +}) + +#define GEN3_IRQ_RESET(uncore, type) \ + gen3_irq_reset((uncore), type##IMR, type##IIR, type##IER) + +#define GEN2_IRQ_RESET(uncore) \ + gen2_irq_reset(uncore) + +#define GEN8_IRQ_INIT_NDX(uncore, type, which, imr_val, ier_val) \ +({ \ + unsigned int which_ = which; \ + gen3_irq_init((uncore), \ + GEN8_##type##_IMR(which_), imr_val, \ + GEN8_##type##_IER(which_), ier_val, \ + GEN8_##type##_IIR(which_)); \ +}) + +#define GEN3_IRQ_INIT(uncore, type, imr_val, ier_val) \ + gen3_irq_init((uncore), \ + type##IMR, imr_val, \ + type##IER, ier_val, \ + type##IIR) + +#define GEN2_IRQ_INIT(uncore, imr_val, ier_val) \ + gen2_irq_init((uncore), imr_val, ier_val) #endif /* __I915_IRQ_H__ */ diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index 79f8ec756362..fdd550405fd3 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -25,7 +25,13 @@ #include <linux/kernel.h> #include <asm/fpu/api.h> -#include "i915_drv.h" +#include "i915_memcpy.h" + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) +#define CI_BUG_ON(expr) BUG_ON(expr) +#else +#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) +#endif static DEFINE_STATIC_KEY_FALSE(has_movntdqa); @@ -34,7 +40,6 @@ static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) { kernel_fpu_begin(); - len >>= 4; while (len >= 4) { asm("movntdqa (%0), %%xmm0\n" "movntdqa 16(%0), %%xmm1\n" @@ -59,6 +64,38 @@ static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) kernel_fpu_end(); } + +static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) +{ + kernel_fpu_begin(); + + while (len >= 4) { + asm("movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movups %%xmm0, (%1)\n" + "movups %%xmm1, 16(%1)\n" + "movups %%xmm2, 32(%1)\n" + "movups %%xmm3, 48(%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 64; + dst += 64; + len -= 4; + } + while (len--) { + asm("movntdqa (%0), %%xmm0\n" + "movups %%xmm0, (%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 16; + dst += 16; + } + + kernel_fpu_end(); +} +#else +static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) {} +static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) {} #endif /** @@ -83,17 +120,47 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) return false; -#ifdef CONFIG_AS_MOVNTDQA if (static_branch_likely(&has_movntdqa)) { if (likely(len)) - __memcpy_ntdqa(dst, src, len); + __memcpy_ntdqa(dst, src, len >> 4); return true; } -#endif return false; } +/** + * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC + * @dst: destination pointer + * @src: source pointer + * @len: how many bytes to copy + * + * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from + * @src to @dst using * non-temporal instructions where available, but + * accepts that its arguments may not be aligned, but are valid for the + * potential 16-byte read past the end. + */ +void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len) +{ + unsigned long addr; + + CI_BUG_ON(!i915_has_memcpy_from_wc()); + + addr = (unsigned long)src; + if (!IS_ALIGNED(addr, 16)) { + unsigned long x = min(ALIGN(addr, 16) - addr, len); + + memcpy(dst, src, x); + + len -= x; + dst += x; + src += x; + } + + if (likely(len)) + __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); +} + void i915_memcpy_init_early(struct drm_i915_private *dev_priv) { /* diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h new file mode 100644 index 000000000000..e36d30edd987 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_memcpy.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_MEMCPY_H__ +#define __I915_MEMCPY_H__ + +#include <linux/types.h> + +struct drm_i915_private; + +void i915_memcpy_init_early(struct drm_i915_private *i915); + +bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); +void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len); + +/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment, + * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot + * perform the operation. To check beforehand, pass in the parameters to + * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits, + * you only need to pass in the minor offsets, page-aligned pointers are + * always valid. + * + * For just checking for SSE4.1, in the foreknowledge that the future use + * will be correctly aligned, just use i915_has_memcpy_from_wc(). + */ +#define i915_can_memcpy_from_wc(dst, src, len) \ + i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0) + +#define i915_has_memcpy_from_wc() \ + i915_memcpy_from_wc(NULL, NULL, 0) + +#endif /* __I915_MEMCPY_H__ */ diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index c23bb29e6d3e..b6376b25ef63 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -33,6 +33,9 @@ struct remap_pfn { struct mm_struct *mm; unsigned long pfn; pgprot_t prot; + + struct sgt_iter sgt; + resource_size_t iobase; }; static int remap_pfn(pte_t *pte, unsigned long addr, void *data) @@ -46,6 +49,35 @@ static int remap_pfn(pte_t *pte, unsigned long addr, void *data) return 0; } +#define use_dma(io) ((io) != -1) + +static inline unsigned long sgt_pfn(const struct remap_pfn *r) +{ + if (use_dma(r->iobase)) + return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT; + else + return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT); +} + +static int remap_sg(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + if (GEM_WARN_ON(!r->sgt.pfn)) + return -EINVAL; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, + pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot))); + r->pfn++; /* track insertions in case we need to unwind later */ + + r->sgt.curr += PAGE_SIZE; + if (r->sgt.curr >= r->sgt.max) + r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase)); + + return 0; +} + /** * remap_io_mapping - remap an IO mapping to userspace * @vma: user vma to map to @@ -63,9 +95,8 @@ int remap_io_mapping(struct vm_area_struct *vma, struct remap_pfn r; int err; - GEM_BUG_ON((vma->vm_flags & - (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) != - (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)); +#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ r.mm = vma->vm_mm; @@ -81,3 +112,40 @@ int remap_io_mapping(struct vm_area_struct *vma, return 0; } + +/** + * remap_io_sg - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @size: size of map area + * @sgl: Start sg entry + * @iobase: Use stored dma address offset by this address or pfn if -1 + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_sg(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct scatterlist *sgl, resource_size_t iobase) +{ + struct remap_pfn r = { + .mm = vma->vm_mm, + .prot = vma->vm_page_prot, + .sgt = __sgt_iter(sgl, use_dma(iobase)), + .iobase = iobase, + }; + int err; + + /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ + GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); + + if (!use_dma(iobase)) + flush_cache_range(vma, addr, size); + + err = apply_to_page_range(r.mm, addr, size, remap_sg, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.h b/drivers/gpu/drm/i915/i915_oa_bdw.h deleted file mode 100644 index 0e667f1a8aa1..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_bdw.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_BDW_H__ -#define __I915_OA_BDW_H__ - -extern void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.h b/drivers/gpu/drm/i915/i915_oa_bxt.h deleted file mode 100644 index 679e92cf4f1d..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_bxt.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_BXT_H__ -#define __I915_OA_BXT_H__ - -extern void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt2.h b/drivers/gpu/drm/i915/i915_oa_cflgt2.h deleted file mode 100644 index 4d6025559bbe..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_cflgt2.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_CFLGT2_H__ -#define __I915_OA_CFLGT2_H__ - -extern void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/i915_oa_cflgt3.h deleted file mode 100644 index 0697f4077402..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_cflgt3.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_CFLGT3_H__ -#define __I915_OA_CFLGT3_H__ - -extern void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_chv.h b/drivers/gpu/drm/i915/i915_oa_chv.h deleted file mode 100644 index 0986eae3135f..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_chv.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_CHV_H__ -#define __I915_OA_CHV_H__ - -extern void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.h b/drivers/gpu/drm/i915/i915_oa_cnl.h deleted file mode 100644 index e830a406aff2..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_cnl.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_CNL_H__ -#define __I915_OA_CNL_H__ - -extern void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_glk.h b/drivers/gpu/drm/i915/i915_oa_glk.h deleted file mode 100644 index 06dedf991edb..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_glk.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_GLK_H__ -#define __I915_OA_GLK_H__ - -extern void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.h b/drivers/gpu/drm/i915/i915_oa_hsw.h deleted file mode 100644 index 3d0c870cd0bd..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_hsw.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_HSW_H__ -#define __I915_OA_HSW_H__ - -extern void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_icl.h b/drivers/gpu/drm/i915/i915_oa_icl.h deleted file mode 100644 index 24eaa97d61ba..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_icl.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_ICL_H__ -#define __I915_OA_ICL_H__ - -extern void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.h b/drivers/gpu/drm/i915/i915_oa_kblgt2.h deleted file mode 100644 index a55398a904de..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_kblgt2.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_KBLGT2_H__ -#define __I915_OA_KBLGT2_H__ - -extern void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.h b/drivers/gpu/drm/i915/i915_oa_kblgt3.h deleted file mode 100644 index 3ddd3483b7cc..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_kblgt3.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_KBLGT3_H__ -#define __I915_OA_KBLGT3_H__ - -extern void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.h b/drivers/gpu/drm/i915/i915_oa_sklgt2.h deleted file mode 100644 index be6256037239..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_sklgt2.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_SKLGT2_H__ -#define __I915_OA_SKLGT2_H__ - -extern void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.h b/drivers/gpu/drm/i915/i915_oa_sklgt3.h deleted file mode 100644 index 650beb068e56..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_sklgt3.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_SKLGT3_H__ -#define __I915_OA_SKLGT3_H__ - -extern void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.h b/drivers/gpu/drm/i915/i915_oa_sklgt4.h deleted file mode 100644 index 8dcf849d131e..000000000000 --- a/drivers/gpu/drm/i915/i915_oa_sklgt4.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - * - * Autogenerated file by GPU Top : https://github.com/rib/gputop - * DO NOT EDIT manually! - */ - -#ifndef __I915_OA_SKLGT4_H__ -#define __I915_OA_SKLGT4_H__ - -extern void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv); - -#endif diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 5b07766a1c26..1dd1f3652795 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400, i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " - "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); + "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " + "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); i915_param_named_unsafe(enable_fbc, int, 0600, "Enable frame buffer compression for power savings " @@ -165,18 +166,24 @@ i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -i915_param_named_unsafe(inject_load_failure, uint, 0400, +i915_param_named_unsafe(inject_probe_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); #endif -i915_param_named(enable_dpcd_backlight, bool, 0600, - "Enable support for DPCD backlight control (default:false)"); +i915_param_named(enable_dpcd_backlight, int, 0600, + "Enable support for DPCD backlight control" + "(-1=use per-VBT LFP backlight type setting, 0=disabled [default], 1=enabled)"); #if IS_ENABLED(CONFIG_DRM_I915_GVT) i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); #endif +#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) +i915_param_named_unsafe(fake_lmem_start, ulong, 0600, + "Fake LMEM start offset (default: 0)"); +#endif + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, @@ -188,6 +195,8 @@ static __always_inline void _print_param(struct drm_printer *p, drm_printf(p, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "unsigned long")) + drm_printf(p, "i915.%s=%lu\n", name, *(const unsigned long *)x); else if (!__builtin_strcmp(type, "char *")) drm_printf(p, "i915.%s=%s\n", name, *(const char **)x); else diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index a4770ce46bd2..31b88f297fbc 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,10 +61,12 @@ struct drm_printer; param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \ param(int, edp_vswing, 0) \ - param(int, reset, 2) \ - param(unsigned int, inject_load_failure, 0) \ + param(int, reset, 3) \ + param(unsigned int, inject_probe_failure, 0) \ param(int, fastboot, -1) \ + param(int, enable_dpcd_backlight, 0) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \ + param(unsigned long, fake_lmem_start, 0) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ @@ -76,7 +78,6 @@ struct drm_printer; param(bool, verbose_state_checks, true) \ param(bool, nuclear_pageflip, false) \ param(bool, enable_dp_mst, true) \ - param(bool, enable_dpcd_backlight, false) \ param(bool, enable_gvt, false) #define MEMBER(T, member, ...) T member; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6c9f46fc3e12..83f01401b8b5 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -23,7 +23,6 @@ */ #include <linux/console.h> -#include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <drm/drm_drv.h> @@ -31,6 +30,7 @@ #include "display/intel_fbdev.h" #include "i915_drv.h" +#include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -118,6 +118,14 @@ [PIPE_C] = IVB_CURSOR_C_OFFSET, \ } +#define TGL_CURSOR_OFFSETS \ + .cursor_offsets = { \ + [PIPE_A] = CURSOR_A_OFFSET, \ + [PIPE_B] = IVB_CURSOR_B_OFFSET, \ + [PIPE_C] = IVB_CURSOR_C_OFFSET, \ + [PIPE_D] = TGL_CURSOR_D_OFFSET, \ + } + #define I9XX_COLORS \ .color = { .gamma_lut_size = 256 } #define I965_COLORS \ @@ -144,10 +152,13 @@ #define GEN_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K +#define GEN_DEFAULT_REGIONS \ + .memory_regions = REGION_SMEM | REGION_STOLEN + #define I830_FEATURES \ GEN(2), \ .is_mobile = 1, \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_overlay = 1, \ .display.cursor_needs_physical = 1, \ .display.overlay_needs_physical = 1, \ @@ -161,11 +172,12 @@ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define I845_FEATURES \ GEN(2), \ - .num_pipes = 1, \ + .pipe_mask = BIT(PIPE_A), \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ @@ -178,32 +190,33 @@ I845_PIPE_OFFSETS, \ I845_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i830_info = { +static const struct intel_device_info i830_info = { I830_FEATURES, PLATFORM(INTEL_I830), }; -static const struct intel_device_info intel_i845g_info = { +static const struct intel_device_info i845g_info = { I845_FEATURES, PLATFORM(INTEL_I845G), }; -static const struct intel_device_info intel_i85x_info = { +static const struct intel_device_info i85x_info = { I830_FEATURES, PLATFORM(INTEL_I85X), .display.has_fbc = 1, }; -static const struct intel_device_info intel_i865g_info = { +static const struct intel_device_info i865g_info = { I845_FEATURES, PLATFORM(INTEL_I865G), }; #define GEN3_FEATURES \ GEN(3), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .engine_mask = BIT(RCS0), \ @@ -212,9 +225,10 @@ static const struct intel_device_info intel_i865g_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i915g_info = { +static const struct intel_device_info i915g_info = { GEN3_FEATURES, PLATFORM(INTEL_I915G), .has_coherent_ggtt = false, @@ -225,7 +239,7 @@ static const struct intel_device_info intel_i915g_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i915gm_info = { +static const struct intel_device_info i915gm_info = { GEN3_FEATURES, PLATFORM(INTEL_I915GM), .is_mobile = 1, @@ -238,7 +252,7 @@ static const struct intel_device_info intel_i915gm_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i945g_info = { +static const struct intel_device_info i945g_info = { GEN3_FEATURES, PLATFORM(INTEL_I945G), .display.has_hotplug = 1, @@ -249,7 +263,7 @@ static const struct intel_device_info intel_i945g_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i945gm_info = { +static const struct intel_device_info i945gm_info = { GEN3_FEATURES, PLATFORM(INTEL_I945GM), .is_mobile = 1, @@ -263,21 +277,21 @@ static const struct intel_device_info intel_i945gm_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_g33_info = { +static const struct intel_device_info g33_info = { GEN3_FEATURES, PLATFORM(INTEL_G33), .display.has_hotplug = 1, .display.has_overlay = 1, }; -static const struct intel_device_info intel_pineview_g_info = { +static const struct intel_device_info pnv_g_info = { GEN3_FEATURES, PLATFORM(INTEL_PINEVIEW), .display.has_hotplug = 1, .display.has_overlay = 1, }; -static const struct intel_device_info intel_pineview_m_info = { +static const struct intel_device_info pnv_m_info = { GEN3_FEATURES, PLATFORM(INTEL_PINEVIEW), .is_mobile = 1, @@ -287,7 +301,7 @@ static const struct intel_device_info intel_pineview_m_info = { #define GEN4_FEATURES \ GEN(4), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ @@ -297,9 +311,10 @@ static const struct intel_device_info intel_pineview_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I965_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i965g_info = { +static const struct intel_device_info i965g_info = { GEN4_FEATURES, PLATFORM(INTEL_I965G), .display.has_overlay = 1, @@ -307,7 +322,7 @@ static const struct intel_device_info intel_i965g_info = { .has_snoop = false, }; -static const struct intel_device_info intel_i965gm_info = { +static const struct intel_device_info i965gm_info = { GEN4_FEATURES, PLATFORM(INTEL_I965GM), .is_mobile = 1, @@ -318,14 +333,14 @@ static const struct intel_device_info intel_i965gm_info = { .has_snoop = false, }; -static const struct intel_device_info intel_g45_info = { +static const struct intel_device_info g45_info = { GEN4_FEATURES, PLATFORM(INTEL_G45), .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; -static const struct intel_device_info intel_gm45_info = { +static const struct intel_device_info gm45_info = { GEN4_FEATURES, PLATFORM(INTEL_GM45), .is_mobile = 1, @@ -337,7 +352,7 @@ static const struct intel_device_info intel_gm45_info = { #define GEN5_FEATURES \ GEN(5), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ @@ -347,14 +362,15 @@ static const struct intel_device_info intel_gm45_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_ironlake_d_info = { +static const struct intel_device_info ilk_d_info = { GEN5_FEATURES, PLATFORM(INTEL_IRONLAKE), }; -static const struct intel_device_info intel_ironlake_m_info = { +static const struct intel_device_info ilk_m_info = { GEN5_FEATURES, PLATFORM(INTEL_IRONLAKE), .is_mobile = 1, @@ -363,7 +379,7 @@ static const struct intel_device_info intel_ironlake_m_info = { #define GEN6_FEATURES \ GEN(6), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -377,18 +393,19 @@ static const struct intel_device_info intel_ironlake_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define SNB_D_PLATFORM \ GEN6_FEATURES, \ PLATFORM(INTEL_SANDYBRIDGE) -static const struct intel_device_info intel_sandybridge_d_gt1_info = { +static const struct intel_device_info snb_d_gt1_info = { SNB_D_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_sandybridge_d_gt2_info = { +static const struct intel_device_info snb_d_gt2_info = { SNB_D_PLATFORM, .gt = 2, }; @@ -399,19 +416,19 @@ static const struct intel_device_info intel_sandybridge_d_gt2_info = { .is_mobile = 1 -static const struct intel_device_info intel_sandybridge_m_gt1_info = { +static const struct intel_device_info snb_m_gt1_info = { SNB_M_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_sandybridge_m_gt2_info = { +static const struct intel_device_info snb_m_gt2_info = { SNB_M_PLATFORM, .gt = 2, }; #define GEN7_FEATURES \ GEN(7), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -425,19 +442,20 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define IVB_D_PLATFORM \ GEN7_FEATURES, \ PLATFORM(INTEL_IVYBRIDGE), \ .has_l3_dpf = 1 -static const struct intel_device_info intel_ivybridge_d_gt1_info = { +static const struct intel_device_info ivb_d_gt1_info = { IVB_D_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_ivybridge_d_gt2_info = { +static const struct intel_device_info ivb_d_gt2_info = { IVB_D_PLATFORM, .gt = 2, }; @@ -448,29 +466,29 @@ static const struct intel_device_info intel_ivybridge_d_gt2_info = { .is_mobile = 1, \ .has_l3_dpf = 1 -static const struct intel_device_info intel_ivybridge_m_gt1_info = { +static const struct intel_device_info ivb_m_gt1_info = { IVB_M_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_ivybridge_m_gt2_info = { +static const struct intel_device_info ivb_m_gt2_info = { IVB_M_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_ivybridge_q_info = { +static const struct intel_device_info ivb_q_info = { GEN7_FEATURES, PLATFORM(INTEL_IVYBRIDGE), .gt = 2, - .num_pipes = 0, /* legal, last one wins */ + .pipe_mask = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; -static const struct intel_device_info intel_valleyview_info = { +static const struct intel_device_info vlv_info = { PLATFORM(INTEL_VALLEYVIEW), GEN(7), .is_lp = 1, - .num_pipes = 2, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .has_runtime_pm = 1, .has_rc6 = 1, .has_rps = true, @@ -486,6 +504,7 @@ static const struct intel_device_info intel_valleyview_info = { I9XX_CURSOR_OFFSETS, I965_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define G75_FEATURES \ @@ -504,17 +523,17 @@ static const struct intel_device_info intel_valleyview_info = { PLATFORM(INTEL_HASWELL), \ .has_l3_dpf = 1 -static const struct intel_device_info intel_haswell_gt1_info = { +static const struct intel_device_info hsw_gt1_info = { HSW_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_haswell_gt2_info = { +static const struct intel_device_info hsw_gt2_info = { HSW_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_haswell_gt3_info = { +static const struct intel_device_info hsw_gt3_info = { HSW_PLATFORM, .gt = 3, }; @@ -522,8 +541,6 @@ static const struct intel_device_info intel_haswell_gt3_info = { #define GEN8_FEATURES \ G75_FEATURES, \ GEN(8), \ - .page_sizes = I915_GTT_PAGE_SIZE_4K | \ - I915_GTT_PAGE_SIZE_2M, \ .has_logical_ring_contexts = 1, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ @@ -534,17 +551,17 @@ static const struct intel_device_info intel_haswell_gt3_info = { GEN8_FEATURES, \ PLATFORM(INTEL_BROADWELL) -static const struct intel_device_info intel_broadwell_gt1_info = { +static const struct intel_device_info bdw_gt1_info = { BDW_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_broadwell_gt2_info = { +static const struct intel_device_info bdw_gt2_info = { BDW_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_broadwell_rsvd_info = { +static const struct intel_device_info bdw_rsvd_info = { BDW_PLATFORM, .gt = 3, /* According to the device ID those devices are GT3, they were @@ -552,17 +569,17 @@ static const struct intel_device_info intel_broadwell_rsvd_info = { */ }; -static const struct intel_device_info intel_broadwell_gt3_info = { +static const struct intel_device_info bdw_gt3_info = { BDW_PLATFORM, .gt = 3, .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; -static const struct intel_device_info intel_cherryview_info = { +static const struct intel_device_info chv_info = { PLATFORM(INTEL_CHERRYVIEW), GEN(8), - .num_pipes = 3, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), .display.has_hotplug = 1, .is_lp = 1, .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), @@ -572,7 +589,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 32, .has_reset_engine = 1, .has_snoop = true, @@ -582,12 +599,12 @@ static const struct intel_device_info intel_cherryview_info = { CHV_CURSOR_OFFSETS, CHV_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define GEN9_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K | \ - I915_GTT_PAGE_SIZE_64K | \ - I915_GTT_PAGE_SIZE_2M + I915_GTT_PAGE_SIZE_64K #define GEN9_FEATURES \ GEN8_FEATURES, \ @@ -595,7 +612,8 @@ static const struct intel_device_info intel_cherryview_info = { GEN9_DEFAULT_PAGE_SIZES, \ .has_logical_ring_preemption = 1, \ .display.has_csr = 1, \ - .has_guc = 1, \ + .has_gt_uc = 1, \ + .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .ddb_size = 896 @@ -603,12 +621,12 @@ static const struct intel_device_info intel_cherryview_info = { GEN9_FEATURES, \ PLATFORM(INTEL_SKYLAKE) -static const struct intel_device_info intel_skylake_gt1_info = { +static const struct intel_device_info skl_gt1_info = { SKL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_skylake_gt2_info = { +static const struct intel_device_info skl_gt2_info = { SKL_PLATFORM, .gt = 2, }; @@ -619,12 +637,12 @@ static const struct intel_device_info intel_skylake_gt2_info = { BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1) -static const struct intel_device_info intel_skylake_gt3_info = { +static const struct intel_device_info skl_gt3_info = { SKL_GT3_PLUS_PLATFORM, .gt = 3, }; -static const struct intel_device_info intel_skylake_gt4_info = { +static const struct intel_device_info skl_gt4_info = { SKL_GT3_PLUS_PLATFORM, .gt = 4, }; @@ -634,11 +652,12 @@ static const struct intel_device_info intel_skylake_gt4_info = { .is_lp = 1, \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_fbc = 1, \ + .display.has_hdcp = 1, \ .display.has_psr = 1, \ .has_runtime_pm = 1, \ .display.has_csr = 1, \ @@ -647,7 +666,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ - .has_guc = 1, \ + .has_gt_uc = 1, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ .has_reset_engine = 1, \ @@ -657,15 +676,16 @@ static const struct intel_device_info intel_skylake_gt4_info = { HSW_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN9_DEFAULT_PAGE_SIZES + GEN9_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_broxton_info = { +static const struct intel_device_info bxt_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_BROXTON), .ddb_size = 512, }; -static const struct intel_device_info intel_geminilake_info = { +static const struct intel_device_info glk_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_GEMINILAKE), .ddb_size = 1024, @@ -676,17 +696,17 @@ static const struct intel_device_info intel_geminilake_info = { GEN9_FEATURES, \ PLATFORM(INTEL_KABYLAKE) -static const struct intel_device_info intel_kabylake_gt1_info = { +static const struct intel_device_info kbl_gt1_info = { KBL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_kabylake_gt2_info = { +static const struct intel_device_info kbl_gt2_info = { KBL_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_kabylake_gt3_info = { +static const struct intel_device_info kbl_gt3_info = { KBL_PLATFORM, .gt = 3, .engine_mask = @@ -697,17 +717,17 @@ static const struct intel_device_info intel_kabylake_gt3_info = { GEN9_FEATURES, \ PLATFORM(INTEL_COFFEELAKE) -static const struct intel_device_info intel_coffeelake_gt1_info = { +static const struct intel_device_info cfl_gt1_info = { CFL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_coffeelake_gt2_info = { +static const struct intel_device_info cfl_gt2_info = { CFL_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_coffeelake_gt3_info = { +static const struct intel_device_info cfl_gt3_info = { CFL_PLATFORM, .gt = 3, .engine_mask = @@ -718,17 +738,24 @@ static const struct intel_device_info intel_coffeelake_gt3_info = { GEN9_FEATURES, \ GEN(10), \ .ddb_size = 1024, \ + .display.has_dsc = 1, \ .has_coherent_ggtt = false, \ GLK_COLORS -static const struct intel_device_info intel_cannonlake_info = { +static const struct intel_device_info cnl_info = { GEN10_FEATURES, PLATFORM(INTEL_CANNONLAKE), .gt = 2, }; +#define GEN11_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + #define GEN11_FEATURES \ GEN10_FEATURES, \ + GEN11_DEFAULT_PAGE_SIZES, \ .pipe_offsets = { \ [TRANSCODER_A] = PIPE_A_OFFSET, \ [TRANSCODER_B] = PIPE_B_OFFSET, \ @@ -750,21 +777,59 @@ static const struct intel_device_info intel_cannonlake_info = { .has_logical_ring_elsq = 1, \ .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } -static const struct intel_device_info intel_icelake_11_info = { +static const struct intel_device_info icl_info = { GEN11_FEATURES, PLATFORM(INTEL_ICELAKE), .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), }; -static const struct intel_device_info intel_elkhartlake_info = { +static const struct intel_device_info ehl_info = { GEN11_FEATURES, PLATFORM(INTEL_ELKHARTLAKE), .require_force_probe = 1, - .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0), + .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0) | BIT(VECS0), .ppgtt_size = 36, }; +#define GEN12_FEATURES \ + GEN11_FEATURES, \ + GEN(12), \ + .pipe_offsets = { \ + [TRANSCODER_A] = PIPE_A_OFFSET, \ + [TRANSCODER_B] = PIPE_B_OFFSET, \ + [TRANSCODER_C] = PIPE_C_OFFSET, \ + [TRANSCODER_D] = PIPE_D_OFFSET, \ + [TRANSCODER_DSI_0] = PIPE_DSI0_OFFSET, \ + [TRANSCODER_DSI_1] = PIPE_DSI1_OFFSET, \ + }, \ + .trans_offsets = { \ + [TRANSCODER_A] = TRANSCODER_A_OFFSET, \ + [TRANSCODER_B] = TRANSCODER_B_OFFSET, \ + [TRANSCODER_C] = TRANSCODER_C_OFFSET, \ + [TRANSCODER_D] = TRANSCODER_D_OFFSET, \ + [TRANSCODER_DSI_0] = TRANSCODER_DSI0_OFFSET, \ + [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ + }, \ + TGL_CURSOR_OFFSETS, \ + .has_global_mocs = 1, \ + .display.has_dsb = 1 + +static const struct intel_device_info tgl_info = { + GEN12_FEATURES, + PLATFORM(INTEL_TIGERLAKE), + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .require_force_probe = 1, + .display.has_modular_fia = 1, + .engine_mask = + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + .has_rps = false, /* XXX disabled for debugging */ +}; + +#define GEN12_DGFX_FEATURES \ + GEN12_FEATURES, \ + .is_dgfx = 1 + #undef GEN #undef PLATFORM @@ -775,83 +840,86 @@ static const struct intel_device_info intel_elkhartlake_info = { * PCI ID matches, otherwise we'll use the wrong info struct above. */ static const struct pci_device_id pciidlist[] = { - INTEL_I830_IDS(&intel_i830_info), - INTEL_I845G_IDS(&intel_i845g_info), - INTEL_I85X_IDS(&intel_i85x_info), - INTEL_I865G_IDS(&intel_i865g_info), - INTEL_I915G_IDS(&intel_i915g_info), - INTEL_I915GM_IDS(&intel_i915gm_info), - INTEL_I945G_IDS(&intel_i945g_info), - INTEL_I945GM_IDS(&intel_i945gm_info), - INTEL_I965G_IDS(&intel_i965g_info), - INTEL_G33_IDS(&intel_g33_info), - INTEL_I965GM_IDS(&intel_i965gm_info), - INTEL_GM45_IDS(&intel_gm45_info), - INTEL_G45_IDS(&intel_g45_info), - INTEL_PINEVIEW_G_IDS(&intel_pineview_g_info), - INTEL_PINEVIEW_M_IDS(&intel_pineview_m_info), - INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info), - INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info), - INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info), - INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info), - INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info), - INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info), - INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ - INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info), - INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info), - INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info), - INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info), - INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info), - INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info), - INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info), - INTEL_VLV_IDS(&intel_valleyview_info), - INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info), - INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info), - INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info), - INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info), - INTEL_CHV_IDS(&intel_cherryview_info), - INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info), - INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info), - INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info), - INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info), - INTEL_BXT_IDS(&intel_broxton_info), - INTEL_GLK_IDS(&intel_geminilake_info), - INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info), - INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info), - INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), - INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), - INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), - INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), - INTEL_WHL_U_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_WHL_U_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_AML_CFL_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_WHL_U_GT3_IDS(&intel_coffeelake_gt3_info), - INTEL_CML_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CNL_IDS(&intel_cannonlake_info), - INTEL_ICL_11_IDS(&intel_icelake_11_info), - INTEL_EHL_IDS(&intel_elkhartlake_info), + INTEL_I830_IDS(&i830_info), + INTEL_I845G_IDS(&i845g_info), + INTEL_I85X_IDS(&i85x_info), + INTEL_I865G_IDS(&i865g_info), + INTEL_I915G_IDS(&i915g_info), + INTEL_I915GM_IDS(&i915gm_info), + INTEL_I945G_IDS(&i945g_info), + INTEL_I945GM_IDS(&i945gm_info), + INTEL_I965G_IDS(&i965g_info), + INTEL_G33_IDS(&g33_info), + INTEL_I965GM_IDS(&i965gm_info), + INTEL_GM45_IDS(&gm45_info), + INTEL_G45_IDS(&g45_info), + INTEL_PINEVIEW_G_IDS(&pnv_g_info), + INTEL_PINEVIEW_M_IDS(&pnv_m_info), + INTEL_IRONLAKE_D_IDS(&ilk_d_info), + INTEL_IRONLAKE_M_IDS(&ilk_m_info), + INTEL_SNB_D_GT1_IDS(&snb_d_gt1_info), + INTEL_SNB_D_GT2_IDS(&snb_d_gt2_info), + INTEL_SNB_M_GT1_IDS(&snb_m_gt1_info), + INTEL_SNB_M_GT2_IDS(&snb_m_gt2_info), + INTEL_IVB_Q_IDS(&ivb_q_info), /* must be first IVB */ + INTEL_IVB_M_GT1_IDS(&ivb_m_gt1_info), + INTEL_IVB_M_GT2_IDS(&ivb_m_gt2_info), + INTEL_IVB_D_GT1_IDS(&ivb_d_gt1_info), + INTEL_IVB_D_GT2_IDS(&ivb_d_gt2_info), + INTEL_HSW_GT1_IDS(&hsw_gt1_info), + INTEL_HSW_GT2_IDS(&hsw_gt2_info), + INTEL_HSW_GT3_IDS(&hsw_gt3_info), + INTEL_VLV_IDS(&vlv_info), + INTEL_BDW_GT1_IDS(&bdw_gt1_info), + INTEL_BDW_GT2_IDS(&bdw_gt2_info), + INTEL_BDW_GT3_IDS(&bdw_gt3_info), + INTEL_BDW_RSVD_IDS(&bdw_rsvd_info), + INTEL_CHV_IDS(&chv_info), + INTEL_SKL_GT1_IDS(&skl_gt1_info), + INTEL_SKL_GT2_IDS(&skl_gt2_info), + INTEL_SKL_GT3_IDS(&skl_gt3_info), + INTEL_SKL_GT4_IDS(&skl_gt4_info), + INTEL_BXT_IDS(&bxt_info), + INTEL_GLK_IDS(&glk_info), + INTEL_KBL_GT1_IDS(&kbl_gt1_info), + INTEL_KBL_GT2_IDS(&kbl_gt2_info), + INTEL_KBL_GT3_IDS(&kbl_gt3_info), + INTEL_KBL_GT4_IDS(&kbl_gt3_info), + INTEL_AML_KBL_GT2_IDS(&kbl_gt2_info), + INTEL_CFL_S_GT1_IDS(&cfl_gt1_info), + INTEL_CFL_S_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_H_GT1_IDS(&cfl_gt1_info), + INTEL_CFL_H_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_U_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_U_GT3_IDS(&cfl_gt3_info), + INTEL_WHL_U_GT1_IDS(&cfl_gt1_info), + INTEL_WHL_U_GT2_IDS(&cfl_gt2_info), + INTEL_AML_CFL_GT2_IDS(&cfl_gt2_info), + INTEL_WHL_U_GT3_IDS(&cfl_gt3_info), + INTEL_CML_GT1_IDS(&cfl_gt1_info), + INTEL_CML_GT2_IDS(&cfl_gt2_info), + INTEL_CML_U_GT1_IDS(&cfl_gt1_info), + INTEL_CML_U_GT2_IDS(&cfl_gt2_info), + INTEL_CNL_IDS(&cnl_info), + INTEL_ICL_11_IDS(&icl_info), + INTEL_EHL_IDS(&ehl_info), + INTEL_TGL_12_IDS(&tgl_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); static void i915_pci_remove(struct pci_dev *pdev) { - struct drm_device *dev; + struct drm_i915_private *i915; - dev = pci_get_drvdata(pdev); - if (!dev) /* driver load aborted, nothing to cleanup */ + i915 = pci_get_drvdata(pdev); + if (!i915) /* driver load aborted, nothing to cleanup */ return; - i915_driver_unload(dev); - drm_dev_put(dev); - + i915_driver_remove(i915); pci_set_drvdata(pdev, NULL); + + drm_dev_put(&i915->drm); } /* is device_id present in comma separated list of ids */ @@ -923,11 +991,11 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - err = i915_driver_load(pdev, ent); + err = i915_driver_probe(pdev, ent); if (err) return err; - if (i915_inject_load_failure()) { + if (i915_inject_probe_failure(pci_get_drvdata(pdev))) { i915_pci_remove(pdev); return -ENODEV; } @@ -938,6 +1006,12 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err > 0 ? -ENOTTY : err; } + err = i915_perf_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } + return 0; } @@ -980,7 +1054,12 @@ static int __init i915_init(void) return 0; } - return pci_register_driver(&i915_pci_driver); + err = pci_register_driver(&i915_pci_driver); + if (err) + return err; + + i915_perf_sysctl_register(); + return 0; } static void __exit i915_exit(void) @@ -988,6 +1067,7 @@ static void __exit i915_exit(void) if (!i915_pci_driver.driver.owner) return; + i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); i915_globals_exit(); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5140017f9a39..0f556d80ba36 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,24 +196,29 @@ #include <linux/uuid.h> #include "gem/i915_gem_context.h" -#include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" #include "i915_drv.h" -#include "i915_oa_hsw.h" -#include "i915_oa_bdw.h" -#include "i915_oa_chv.h" -#include "i915_oa_sklgt2.h" -#include "i915_oa_sklgt3.h" -#include "i915_oa_sklgt4.h" -#include "i915_oa_bxt.h" -#include "i915_oa_kblgt2.h" -#include "i915_oa_kblgt3.h" -#include "i915_oa_glk.h" -#include "i915_oa_cflgt2.h" -#include "i915_oa_cflgt3.h" -#include "i915_oa_cnl.h" -#include "i915_oa_icl.h" +#include "i915_perf.h" +#include "oa/i915_oa_hsw.h" +#include "oa/i915_oa_bdw.h" +#include "oa/i915_oa_chv.h" +#include "oa/i915_oa_sklgt2.h" +#include "oa/i915_oa_sklgt3.h" +#include "oa/i915_oa_sklgt4.h" +#include "oa/i915_oa_bxt.h" +#include "oa/i915_oa_kblgt2.h" +#include "oa/i915_oa_kblgt3.h" +#include "oa/i915_oa_glk.h" +#include "oa/i915_oa_cflgt2.h" +#include "oa/i915_oa_cflgt3.h" +#include "oa/i915_oa_cnl.h" +#include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -290,6 +295,7 @@ static u32 i915_perf_stream_paranoid = true; /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ #define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK_EXTENDED 0x7f #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -335,17 +341,24 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this + * @engine: The engine (typically rcs0) being monitored by the OA unit * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -355,6 +368,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -362,71 +376,83 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; + + struct intel_engine_cs *engine; }; -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) -{ - if (!PTR_ERR(oa_config->flex_regs)) - kfree(oa_config->flex_regs); - if (!PTR_ERR(oa_config->b_counter_regs)) - kfree(oa_config->b_counter_regs); - if (!PTR_ERR(oa_config->mux_regs)) - kfree(oa_config->mux_regs); - kfree(oa_config); -} +struct i915_oa_config_bo { + struct llist_node node; -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) + struct i915_oa_config *oa_config; + struct i915_vma *vma; +}; + +static struct ctl_table_header *sysctl_header; + +static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); + +void i915_oa_config_release(struct kref *ref) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->flex_regs); + kfree(oa_config->b_counter_regs); + kfree(oa_config->mux_regs); - free_oa_config(dev_priv, oa_config); + kfree_rcu(oa_config, rcu); } -static int get_oa_config(struct drm_i915_private *dev_priv, - int metrics_set, - struct i915_oa_config **out_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - int ret; + struct i915_oa_config *oa_config; - if (metrics_set == 1) { - *out_config = &dev_priv->perf.oa.test_config; - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); - return 0; - } + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; + else + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); - if (ret) - return ret; + return oa_config; +} - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; - else - atomic_inc(&(*out_config)->ref_count); +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); +} - mutex_unlock(&dev_priv->perf.metrics_lock); +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; - return ret; + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; } -static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) +static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { - return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } -static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) +static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); + struct intel_uncore *uncore = stream->uncore; + u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } /** * oa_buffer_check_unlocked - check for data and update tail ptr state - * @dev_priv: i915 device instance + * @stream: i915 stream instance * * This is either called via fops (for blocking reads in user ctx) or the poll * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check @@ -448,9 +474,9 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) * * Returns: %true if the OA buffer contains data, else %false */ -static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) +static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { - int report_size = dev_priv->perf.oa.oa_buffer.format_size; + int report_size = stream->oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; u32 head, hw_tail, aged_tail, aging_tail; @@ -460,19 +486,19 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * could result in an OA buffer reset which might reset the head, * tails[] and aged_tail state. */ - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* NB: The head we observe here might effectively be a little out of * date (between head and tails[aged_idx].offset if there is currently * a read() in progress. */ - head = dev_priv->perf.oa.oa_buffer.head; + head = stream->oa_buffer.head; - aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; - aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; + aged_idx = stream->oa_buffer.aged_tail_idx; + aged_tail = stream->oa_buffer.tails[aged_idx].offset; + aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv); + hw_tail = stream->perf->ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -492,16 +518,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * available) without needing to wait for a later hrtimer callback. */ if (aging_tail != INVALID_TAIL_PTR && - ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > + ((now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC)) { aged_idx ^= 1; - dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; + stream->oa_buffer.aged_tail_idx = aged_idx; aged_tail = aging_tail; /* Mark that we need a new pointer to start aging... */ - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; aging_tail = INVALID_TAIL_PTR; } @@ -516,7 +542,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) if (aging_tail == INVALID_TAIL_PTR && (aged_tail == INVALID_TAIL_PTR || OA_TAKEN(hw_tail, aged_tail) >= report_size)) { - struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma; + struct i915_vma *vma = stream->oa_buffer.vma; u32 gtt_offset = i915_ggtt_offset(vma); /* Be paranoid and do a bounds check on the pointer read back @@ -525,16 +551,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) */ if (hw_tail >= gtt_offset && hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = + stream->oa_buffer.tails[!aged_idx].offset = aging_tail = hw_tail; - dev_priv->perf.oa.oa_buffer.aging_timestamp = now; + stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); return aged_tail == INVALID_TAIL_PTR ? false : OA_TAKEN(aged_tail, head) >= report_size; @@ -597,8 +623,7 @@ static int append_oa_sample(struct i915_perf_stream *stream, size_t *offset, const u8 *report) { - struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; + int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; u32 sample_flags = stream->sample_flags; @@ -649,10 +674,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct intel_uncore *uncore = stream->uncore; + int report_size = stream->oa_buffer.format_size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; @@ -664,13 +689,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, if (WARN_ON(!stream->enabled)) return -EIO; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = dev_priv->perf.oa.oa_buffer.head; - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; + head = stream->oa_buffer.head; + aged_tail_idx = stream->oa_buffer.aged_tail_idx; + tail = stream->oa_buffer.tails[aged_tail_idx].offset; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* * An invalid tail pointer here means we're still waiting for the poll @@ -732,14 +757,16 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * it to userspace... */ reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & - OAREPORT_REASON_MASK); + (IS_GEN(stream->perf->i915, 12) ? + OAREPORT_REASON_MASK_EXTENDED : + OAREPORT_REASON_MASK)); if (reason == 0) { - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } - ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; + ctx_id = report32[2] & stream->specific_ctx_id_mask; /* * Squash whatever is in the CTX_ID field if it's marked as @@ -749,7 +776,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -783,18 +811,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.oa.exclusive_stream->ctx || - dev_priv->perf.oa.specific_ctx_id == ctx_id || - (dev_priv->perf.oa.oa_buffer.last_ctx_id == - dev_priv->perf.oa.specific_ctx_id) || + if (!stream->perf->exclusive_stream->ctx || + stream->specific_ctx_id == ctx_id || + stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { /* * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.oa.exclusive_stream->ctx && - dev_priv->perf.oa.specific_ctx_id != ctx_id) { + if (stream->perf->exclusive_stream->ctx && + stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -803,7 +830,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, if (ret) break; - dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; + stream->oa_buffer.last_ctx_id = ctx_id; } /* @@ -817,18 +844,23 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* * We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ head += gtt_offset; + intel_uncore_write(uncore, oaheadptr, + head & GEN12_OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; - I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); - dev_priv->perf.oa.oa_buffer.head = head; - - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); } return ret; @@ -859,14 +891,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus; + i915_reg_t oastatus_reg; int ret; - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) + if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = I915_READ(GEN8_OASTATUS); + oastatus_reg = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OASTATUS : GEN8_OASTATUS; + + oastatus = intel_uncore_read(uncore, oastatus_reg); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -889,16 +925,16 @@ static int gen8_oa_read(struct i915_perf_stream *stream, return ret; DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - dev_priv->perf.oa.period_exponent); + stream->period_exponent); - dev_priv->perf.oa.ops.oa_disable(stream); - dev_priv->perf.oa.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, oastatus_reg); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -906,8 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - I915_WRITE(GEN8_OASTATUS, - oastatus & ~GEN8_OASTATUS_REPORT_LOST); + intel_uncore_write(uncore, oastatus_reg, + oastatus & ~GEN8_OASTATUS_REPORT_LOST); } return gen8_append_oa_reports(stream, buf, count, offset); @@ -938,10 +974,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct intel_uncore *uncore = stream->uncore; + int report_size = stream->oa_buffer.format_size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; @@ -953,13 +989,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, if (WARN_ON(!stream->enabled)) return -EIO; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = dev_priv->perf.oa.oa_buffer.head; - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; + head = stream->oa_buffer.head; + aged_tail_idx = stream->oa_buffer.aged_tail_idx; + tail = stream->oa_buffer.tails[aged_tail_idx].offset; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* An invalid tail pointer here means we're still waiting for the poll * hrtimer callback to give us a pointer @@ -1012,7 +1048,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -1031,19 +1067,19 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ head += gtt_offset; - I915_WRITE(GEN7_OASTATUS2, - ((head & GEN7_OASTATUS2_HEAD_MASK) | - GEN7_OASTATUS2_MEM_SELECT_GGTT)); - dev_priv->perf.oa.oa_buffer.head = head; + intel_uncore_write(uncore, GEN7_OASTATUS2, + (head & GEN7_OASTATUS2_HEAD_MASK) | + GEN7_OASTATUS2_MEM_SELECT_GGTT); + stream->oa_buffer.head = head; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); } return ret; @@ -1070,21 +1106,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus1; int ret; - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) + if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); /* XXX: On Haswell we don't have a safe way to clear oastatus1 * bits while the OA unit is enabled (while the tail pointer * may be updated asynchronously) so we ignore status bits * that have already been reported to userspace. */ - oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; + oastatus1 &= ~stream->perf->gen7_latched_oastatus1; /* We treat OABUFFER_OVERFLOW as a significant error: * @@ -1113,12 +1149,12 @@ static int gen7_oa_read(struct i915_perf_stream *stream, return ret; DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - dev_priv->perf.oa.period_exponent); + stream->period_exponent); - dev_priv->perf.oa.ops.oa_disable(stream); - dev_priv->perf.oa.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { @@ -1126,7 +1162,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - dev_priv->perf.oa.gen7_latched_oastatus1 |= + stream->perf->gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1149,14 +1185,12 @@ static int gen7_oa_read(struct i915_perf_stream *stream, */ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - /* We would wait indefinitely if periodic sampling is not enabled */ - if (!dev_priv->perf.oa.periodic) + if (!stream->periodic) return -EIO; - return wait_event_interruptible(dev_priv->perf.oa.poll_wq, - oa_buffer_check_unlocked(dev_priv)); + return wait_event_interruptible(stream->poll_wq, + oa_buffer_check_unlocked(stream)); } /** @@ -1173,9 +1207,7 @@ static void i915_oa_poll_wait(struct i915_perf_stream *stream, struct file *file, poll_table *wait) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); + poll_wait(file, &stream->poll_wq, wait); } /** @@ -1195,24 +1227,18 @@ static int i915_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - return dev_priv->perf.oa.ops.read(stream, buf, count, offset); + return stream->perf->ops.read(stream, buf, count, offset); } -static struct intel_context *oa_pin_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx) +static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) { struct i915_gem_engines_iter it; + struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; int err; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) - return ERR_PTR(err); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (ce->engine->class != RENDER_CLASS) + if (ce->engine != stream->engine) /* first match! */ continue; /* @@ -1221,17 +1247,13 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915, */ err = intel_context_pin(ce); if (err == 0) { - i915->perf.oa.pinned_ctx = ce; + stream->pinned_ctx = ce; break; } } i915_gem_context_unlock_engines(ctx); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return ERR_PTR(err); - - return i915->perf.oa.pinned_ctx; + return stream->pinned_ctx; } /** @@ -1246,28 +1268,31 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915, */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; struct intel_context *ce; - ce = oa_pin_context(i915, stream->ctx); + ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); - switch (INTEL_GEN(i915)) { + switch (INTEL_GEN(ce->engine->i915)) { case 7: { /* * On Haswell we don't do any post processing of the reports * and don't need to use the mask. */ - i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); - i915->perf.oa.specific_ctx_id_mask = 0; + stream->specific_ctx_id = i915_ggtt_offset(ce->state); + stream->specific_ctx_id_mask = 0; break; } case 8: case 9: case 10: - if (USES_GUC_SUBMISSION(i915)) { + if (intel_engine_in_execlists_submission_mode(ce->engine)) { + stream->specific_ctx_id_mask = + (1U << GEN8_CTX_ID_WIDTH) - 1; + stream->specific_ctx_id = stream->specific_ctx_id_mask; + } else { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1278,43 +1303,35 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - i915->perf.oa.specific_ctx_id = + stream->specific_ctx_id = lower_32_bits(ce->lrc_desc) >> 12; /* * GuC uses the top bit to signal proxy submission, so * ignore that bit. */ - i915->perf.oa.specific_ctx_id_mask = + stream->specific_ctx_id_mask = (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; - } else { - i915->perf.oa.specific_ctx_id_mask = - (1U << GEN8_CTX_ID_WIDTH) - 1; - i915->perf.oa.specific_ctx_id = - upper_32_bits(ce->lrc_desc); - i915->perf.oa.specific_ctx_id &= - i915->perf.oa.specific_ctx_id_mask; } break; - case 11: { - i915->perf.oa.specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | - ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | - ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); - i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc); - i915->perf.oa.specific_ctx_id &= - i915->perf.oa.specific_ctx_id_mask; + case 11: + case 12: { + stream->specific_ctx_id_mask = + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = stream->specific_ctx_id_mask; break; } default: - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(INTEL_GEN(ce->engine->i915)); } + ce->tag = stream->specific_ctx_id_mask; + DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", - i915->perf.oa.specific_ctx_id, - i915->perf.oa.specific_ctx_id_mask); + stream->specific_ctx_id, + stream->specific_ctx_id_mask); return 0; } @@ -1328,93 +1345,104 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_context *ce; - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - dev_priv->perf.oa.specific_ctx_id_mask = 0; - - ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); + ce = fetch_and_zero(&stream->pinned_ctx); if (ce) { - mutex_lock(&dev_priv->drm.struct_mutex); + ce->tag = 0; /* recomputed on next submission after parking */ intel_context_unpin(ce); - mutex_unlock(&dev_priv->drm.struct_mutex); } + + stream->specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id_mask = 0; } static void -free_oa_buffer(struct drm_i915_private *i915) +free_oa_buffer(struct i915_perf_stream *stream) { - mutex_lock(&i915->drm.struct_mutex); - - i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, + i915_vma_unpin_and_release(&stream->oa_buffer.vma, I915_VMA_RELEASE_MAP); - mutex_unlock(&i915->drm.struct_mutex); + stream->oa_buffer.vaddr = NULL; +} - i915->perf.oa.oa_buffer.vaddr = NULL; +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + +static void +free_noa_wait(struct i915_perf_stream *stream) +{ + i915_vma_unpin_and_release(&stream->noa_wait, 0); } static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); + BUG_ON(stream != perf->exclusive_stream); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ - mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); - free_oa_buffer(dev_priv); + free_oa_buffer(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); + free_noa_wait(stream); - if (dev_priv->perf.oa.spurious_report_rs.missed) { + if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - dev_priv->perf.oa.spurious_report_rs.missed); + perf->spurious_report_rs.missed); } } -static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) +static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, - gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ - dev_priv->perf.oa.oa_buffer.head = gtt_offset; + intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); + stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN7_OABUFFER, gtt_offset); + intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ + gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* On Haswell we have to track which OASTATUS1 flags we've * already seen since they can't be cleared while periodic * sampling is enabled. */ - dev_priv->perf.oa.gen7_latched_oastatus1 = 0; + stream->perf->gen7_latched_oastatus1 = 0; /* NB: although the OA buffer will initially be allocated * zeroed via shmfs (and so this memset is redundant when @@ -1427,26 +1455,24 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ - dev_priv->perf.oa.pollin = false; + stream->pollin = false; } -static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) +static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - I915_WRITE(GEN8_OASTATUS, 0); - I915_WRITE(GEN8_OAHEADPTR, gtt_offset); - dev_priv->perf.oa.oa_buffer.head = gtt_offset; + intel_uncore_write(uncore, GEN8_OASTATUS, 0); + intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); + stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN8_OABUFFER_UDW, 0); + intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); /* * PRM says: @@ -1456,22 +1482,22 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * to enable proper functionality of the overflow * bit." */ - I915_WRITE(GEN8_OABUFFER, gtt_offset | + intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); + intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; /* * Reset state used to recognise context switches, affecting which * reports we will forward to userspace while filtering for a single * context. */ - dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* * NB: although the OA buffer will initially be allocated @@ -1485,36 +1511,84 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + + stream->pollin = false; +} + +static void gen12_init_oa_buffer(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); + intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, + gtt_offset & GEN12_OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = gtt_offset; + + /* + * PRM says: + * + * "This MMIO must be set before the OATAILPTR + * register and after the OAHEADPTR register. This is + * to enable proper functionality of the overflow + * bit." + */ + intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); + intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, + gtt_offset & GEN12_OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointers to read from... */ + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; /* - * Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. + * Reset state used to recognise context switches, affecting which + * reports we will forward to userspace while filtering for a single + * context. + */ + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* + * NB: although the OA buffer will initially be allocated + * zeroed via shmfs (and so this memset is redundant when + * first allocating), we may re-init the OA buffer, either + * when re-enabling a stream or in error/reset paths. + * + * The reason we clear the buffer for each re-init is for the + * sanity check in gen8_append_oa_reports() that looks at the + * reason field to make sure it's non-zero which relies on + * the assumption that new reports are being written to zeroed + * memory... */ - dev_priv->perf.oa.pollin = false; + memset(stream->oa_buffer.vaddr, 0, + stream->oa_buffer.vma->size); + + stream->pollin = false; } -static int alloc_oa_buffer(struct drm_i915_private *dev_priv) +static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; struct i915_vma *vma; int ret; - if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) + if (WARN_ON(stream->oa_buffer.vma)) return -ENODEV; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); + bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); - ret = PTR_ERR(bo); - goto unlock; + return PTR_ERR(bo); } i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); @@ -1525,20 +1599,16 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) ret = PTR_ERR(vma); goto err_unref; } - dev_priv->perf.oa.oa_buffer.vma = vma; + stream->oa_buffer.vma = vma; - dev_priv->perf.oa.oa_buffer.vaddr = + stream->oa_buffer.vaddr = i915_gem_object_pin_map(bo, I915_MAP_WB); - if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { - ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); + if (IS_ERR(stream->oa_buffer.vaddr)) { + ret = PTR_ERR(stream->oa_buffer.vaddr); goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", - i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), - dev_priv->perf.oa.oa_buffer.vaddr); - - goto unlock; + return 0; err_unpin: __i915_vma_unpin(vma); @@ -1546,58 +1616,396 @@ err_unpin: err_unref: i915_gem_object_put(bo); - dev_priv->perf.oa.oa_buffer.vaddr = NULL; - dev_priv->perf.oa.oa_buffer.vma = NULL; + stream->oa_buffer.vaddr = NULL; + stream->oa_buffer.vma = NULL; + + return ret; +} + +static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, + bool save, i915_reg_t reg, u32 offset, + u32 dword_count) +{ + u32 cmd; + u32 d; + + cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; + if (INTEL_GEN(stream->perf->i915) >= 8) + cmd++; + + for (d = 0; d < dword_count; d++) { + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg) + 4 * d; + *cs++ = intel_gt_scratch_offset(stream->engine->gt, + offset) + 4 * d; + *cs++ = 0; + } + + return cs; +} + +static int alloc_noa_wait(struct i915_perf_stream *stream) +{ + struct drm_i915_private *i915 = stream->perf->i915; + struct drm_i915_gem_object *bo; + struct i915_vma *vma; + const u64 delay_ticks = 0xffffffffffffffff - + DIV64_U64_ROUND_UP( + atomic64_read(&stream->perf->noa_programming_delay) * + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, + 1000000ull); + const u32 base = stream->engine->mmio_base; +#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) + u32 *batch, *ts0, *cs, *jump; + int ret, i; + enum { + START_TS, + NOW_TS, + DELTA_TS, + JUMP_PREDICATE, + DELTA_TARGET, + N_CS_GPR + }; + + bo = i915_gem_object_create_internal(i915, 4096); + if (IS_ERR(bo)) { + DRM_ERROR("Failed to allocate NOA wait batchbuffer\n"); + return PTR_ERR(bo); + } + + /* + * We pin in GGTT because we jump into this buffer now because + * multiple OA config BOs will have a jump to this address and it + * needs to be fixed during the lifetime of the i915/perf stream. + */ + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(batch)) { + ret = PTR_ERR(batch); + goto err_unpin; + } + + /* Save registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, true /* save */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, true /* save */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* First timestamp snapshot location. */ + ts0 = cs; + + /* + * Initial snapshot of the timestamp register to implement the wait. + * We work with 32b values, so clear out the top 32b bits of the + * register because the ALU works 64bits. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); + + /* + * This is the location we're going to jump back into until the + * required amount of time has passed. + */ + jump = cs; + + /* + * Take another snapshot of the timestamp register. Take care to clear + * up the top 32bits of CS_GPR(1) as we're using it for other + * operations below. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); + + /* + * Do a diff between the 2 timestamps and store the result back into + * CS_GPR(1). + */ + *cs++ = MI_MATH(5); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); + *cs++ = MI_MATH_SUB; + *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); + *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the + * timestamp have rolled over the 32bits) into the predicate register + * to be used for the predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Restart from the beginning if we had timestamps roll over. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; + *cs++ = 0; + + /* + * Now add the diff between to previous timestamps and add it to : + * (((1 * << 64) - 1) - delay_ns) + * + * When the Carry Flag contains 1 this means the elapsed time is + * longer than the expected delay, and we can exit the wait loop. + */ + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); + *cs++ = lower_32_bits(delay_ticks); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; + *cs++ = upper_32_bits(delay_ticks); + + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + *cs++ = MI_ARB_CHECK; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); + /* + * Transfer the result into the predicate register to be used for the + * predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Predicate the jump. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; + *cs++ = 0; + + /* Restore registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, false /* restore */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* And return to the ring. */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); + + i915_gem_object_flush_map(bo); + i915_gem_object_unpin_map(bo); + + stream->noa_wait = vma; + return 0; + +err_unpin: + i915_vma_unpin_and_release(&vma, 0); +err_unref: + i915_gem_object_put(bo); return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static u32 *write_cs_mi_lri(u32 *cs, + const struct i915_oa_reg *reg_data, + u32 n_regs) { u32 i; for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, + n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; +} + +static int num_lri_dwords(int num_regs) +{ + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static struct i915_oa_config_bo * +alloc_oa_config_buffer(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config) +{ + struct drm_i915_gem_object *obj; + struct i915_oa_config_bo *oa_bo; + size_t config_length = 0; + u32 *cs; + int err; + + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); - I915_WRITE(reg->addr, reg->value); + config_length += num_lri_dwords(oa_config->mux_regs_len); + config_length += num_lri_dwords(oa_config->b_counter_regs_len); + config_length += num_lri_dwords(oa_config->flex_regs_len); + config_length += 3; /* MI_BATCH_BUFFER_START */ + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_free; + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_oa_bo; } + + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->b_counter_regs, + oa_config->b_counter_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->flex_regs, + oa_config->flex_regs_len); + + /* Jump into the active wait. */ + *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8); + *cs++ = i915_ggtt_offset(stream->noa_wait); + *cs++ = 0; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + oa_bo->vma = i915_vma_instance(obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->vma)) { + err = PTR_ERR(oa_bo->vma); + goto err_oa_bo; + } + + oa_bo->oa_config = i915_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; + +err_oa_bo: + i915_gem_object_put(obj); +err_free: + kfree(oa_bo); + return ERR_PTR(err); } -static void delay_after_mux(void) +static struct i915_vma * +get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { + struct i915_oa_config_bo *oa_bo; + /* - * It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. + * Look for the buffer in the already allocated BOs attached + * to the stream. */ - usleep_range(15000, 20000); + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, + oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = alloc_oa_config_buffer(stream, oa_config); + if (IS_ERR(oa_bo)) + return ERR_CAST(oa_bo); + +out: + return i915_vma_get(oa_bo->vma); +} + +static int emit_oa_config(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config, + struct intel_context *ce) +{ + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = get_oa_vma(stream, oa_config); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma_put; + + intel_engine_pm_get(ce->engine); + rq = i915_request_create(ce); + intel_engine_pm_put(ce->engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (err) + goto err_add_request; + + err = rq->engine->emit_bb_start(rq, + vma->node.start, 0, + I915_DISPATCH_SECURE); +err_add_request: + i915_request_add(rq); +err_vma_unpin: + i915_vma_unpin(vma); +err_vma_put: + i915_vma_put(vma); + return err; +} + +static struct intel_context *oa_context(struct i915_perf_stream *stream) +{ + return stream->pinned_ctx ?: stream->engine->kernel_context; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; + struct intel_uncore *uncore = stream->uncore; /* * PRM: @@ -1609,31 +2017,47 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * count the events from non-render domain. Unit level clock * gating for RCS should also be disabled. */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + GEN7_DOP_CLOCK_GATE_ENABLE, 0); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); + return emit_oa_config(stream, stream->oa_config, oa_context(stream)); +} - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); +static void hsw_disable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; - return 0; + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN7_DOP_CLOCK_GATE_ENABLE); + + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } -static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, + i915_reg_t reg) { - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & - ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | - GEN7_DOP_CLOCK_GATE_ENABLE)); + u32 mmio = i915_mmio_reg_offset(reg); + int i; - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); -} + /* + * This arbitrary default will select the 'EU FPU0 Pipeline + * Active' event. In the future it's anticipated that there + * will be an explicit 'No Event' we can select, but not yet... + */ + if (!oa_config) + return 0; + + for (i = 0; i < oa_config->flex_regs_len; i++) { + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) + return oa_config->flex_regs[i].value; + } + return 0; +} /* * NB: It must always remain pointer safe to run this even if the OA unit * has been disabled. @@ -1642,13 +2066,11 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) * in the case that the OA unit has been disabled. */ static void -gen8_update_reg_state_unlocked(struct intel_context *ce, - u32 *reg_state, - const struct i915_oa_config *oa_config) +gen8_update_reg_state_unlocked(const struct intel_context *ce, + const struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = ce->gem_context->i915; - u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset; + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; + u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ i915_reg_t flex_regs[] = { EU_PERF_CNTL0, @@ -1659,41 +2081,187 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, EU_PERF_CNTL5, EU_PERF_CNTL6, }; + u32 *reg_state = ce->lrc_reg_state; int i; - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, - (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME); + reg_state[ctx_oactxctrl + 1] = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - u32 state_offset = ctx_flexeu0 + i * 2; - u32 mmio = i915_mmio_reg_offset(flex_regs[i]); + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + reg_state[ctx_flexeu0 + i * 2 + 1] = + oa_config_flex_reg(stream->oa_config, flex_regs[i]); - /* - * This arbitrary default will select the 'EU FPU0 Pipeline - * Active' event. In the future it's anticipated that there - * will be an explicit 'No Event' we can select, but not yet... - */ - u32 value = 0; + reg_state[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu); +} - if (oa_config) { - u32 j; +struct flex { + i915_reg_t reg; + u32 offset; + u32 value; +}; - for (j = 0; j < oa_config->flex_regs_len; j++) { - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { - value = oa_config->flex_regs[j].value; - break; - } - } - } +static int +gen8_store_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + do { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + flex->offset * sizeof(u32); + *cs++ = 0; + *cs++ = flex->value; + } while (flex++, --count); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_load_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 *cs; + + GEM_BUG_ON(!count || count > 63); + + cs = intel_ring_begin(rq, 2 * count + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(count); + do { + *cs++ = i915_mmio_reg_offset(flex->reg); + *cs++ = flex->value; + } while (flex++, --count); + *cs++ = MI_NOOP; - CTX_REG(reg_state, state_offset, flex_regs[i], value); + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen8_modify_context(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Serialise with the remote context */ + err = intel_context_prepare_remote_request(ce, rq); + if (err == 0) + err = gen8_store_flex(rq, ce, flex, count); + + i915_request_add(rq); + return err; +} + +static int gen8_modify_self(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + err = gen8_load_flex(rq, ce, flex, count); + + i915_request_add(rq); + return err; +} + +static int gen8_configure_context(struct i915_gem_context *ctx, + struct flex *flex, unsigned int count) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; + + /* Otherwise OA settings will be set upon first use */ + if (!intel_context_pin_if_active(ce)) + continue; + + flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); + err = gen8_modify_context(ce, flex, count); + + intel_context_unpin(ce); + if (err) + break; } + i915_gem_context_unlock_engines(ctx); + + return err; +} + +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) +{ + int err; + struct intel_context *ce = stream->pinned_ctx; + u32 format = stream->oa_buffer.format; + struct flex regs_context[] = { + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + enable ? GEN8_OA_COUNTER_RESUME : 0, + }, + }; + /* Offsets in regs_lri are not used since this configuration is only + * applied using LRI. Initialize the correct offsets for posterity. + */ +#define GEN12_OAR_OACONTROL_OFFSET 0x5B0 + struct flex regs_lri[] = { + { + GEN12_OAR_OACONTROL, + GEN12_OAR_OACONTROL_OFFSET + 1, + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) + }, + { + RING_CONTEXT_CONTROL(ce->engine->mmio_base), + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : + 0) + }, + }; - CTX_REG(reg_state, - CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - intel_sseu_make_rpcs(i915, &ce->sseu)); + /* Modify the context image of pinned context with regs_context*/ + err = intel_context_lock_pinned(ce); + if (err) + return err; + + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); + intel_context_unlock_pinned(ce); + if (err) + return err; + + /* Apply regs_lri using LRI with pinned context */ + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); } /* @@ -1719,16 +2287,18 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, * per-context OA state. * * Note: it's only the RCS/Render context that has any OA state. + * Note: the first flex register passed must always be R_PWR_CLK_STATE */ -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config) +static int oa_configure_all_contexts(struct i915_perf_stream *stream, + struct flex *regs, + size_t num_regs) { - unsigned int map_type = i915_coherent_map_type(dev_priv); - struct i915_gem_context *ctx; - struct i915_request *rq; - int ret; + struct drm_i915_private *i915 = stream->perf->i915; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx, *cn; + int err; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&stream->perf->lock); /* * The OA register config is setup through the context image. This image @@ -1740,66 +2310,106 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * this might leave small interval of time where the OA unit is * configured at an invalid sampling period. * - * So far the best way to work around this issue seems to be draining - * the GPU from any submitted work. + * Note that since we emit all requests from a single ring, there + * is still an implicit global barrier here that may cause a high + * priority context to wait for an otherwise independent low priority + * context. Contexts idle at the time of reconfiguration are not + * trapped behind the barrier. */ - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - /* Update all contexts now that we've stalled the submission. */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct i915_gem_engines_iter it; - struct intel_context *ce; + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { + if (!kref_get_unless_zero(&ctx->ref)) + continue; - for_each_gem_engine(ce, - i915_gem_context_lock_engines(ctx), - it) { - u32 *regs; + spin_unlock(&i915->gem.contexts.lock); - if (ce->engine->class != RENDER_CLASS) - continue; + err = gen8_configure_context(ctx, regs, num_regs); + if (err) { + i915_gem_context_put(ctx); + return err; + } - /* OA settings will be set upon first use */ - if (!ce->state) - continue; + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); - regs = i915_gem_object_pin_map(ce->state->obj, - map_type); - if (IS_ERR(regs)) { - i915_gem_context_unlock_engines(ctx); - return PTR_ERR(regs); - } + /* + * After updating all other contexts, we need to modify ourselves. + * If we don't modify the kernel_context, we do not get events while + * idle. + */ + for_each_uabi_engine(engine, i915) { + struct intel_context *ce = engine->kernel_context; - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + if (engine->class != RENDER_CLASS) + continue; - gen8_update_reg_state_unlocked(ce, regs, oa_config); + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - i915_gem_object_unpin_map(ce->state->obj); - } - i915_gem_context_unlock_engines(ctx); + err = gen8_modify_self(ce, regs, num_regs); + if (err) + return err; } - /* - * Apply the configuration by doing one context restore of the edited - * context image. - */ - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); + return 0; +} - i915_request_add(rq); +static int gen12_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + }; - return 0; + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); +} + +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + /* The MMIO offsets for Flex EU registers aren't contiguous */ + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + }, + { EU_PERF_CNTL0, ctx_flexeuN(0) }, + { EU_PERF_CNTL1, ctx_flexeuN(1) }, + { EU_PERF_CNTL2, ctx_flexeuN(2) }, + { EU_PERF_CNTL3, ctx_flexeuN(3) }, + { EU_PERF_CNTL4, ctx_flexeuN(4) }, + { EU_PERF_CNTL5, ctx_flexeuN(5) }, + { EU_PERF_CNTL6, ctx_flexeuN(6) }, + }; +#undef ctx_flexeuN + int i; + + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + + for (i = 2; i < ARRAY_SIZE(regs); i++) + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); } static int gen8_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; int ret; /* @@ -1825,10 +2435,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN_RANGE(dev_priv, 9, 11)) { - I915_WRITE(GEN8_OA_DEBUG, - _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); + if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { + intel_uncore_write(uncore, GEN8_OA_DEBUG, + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); } /* @@ -1836,46 +2446,111 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(dev_priv, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config); if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); + return emit_oa_config(stream, oa_config, oa_context(stream)); +} + +static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) +{ + return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, + (stream->sample_flags & SAMPLE_OA_REPORT) ? + 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); +} - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); +static int gen12_enable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + int ret; - return 0; + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, + /* Disable clk ratio reports, like previous Gens. */ + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | + /* + * If the user didn't require OA reports, instruct + * the hardware not to emit ctx switch reports. + */ + oag_report_ctx_switches(stream)); + + intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? + (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | + GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | + (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) + : 0); + + /* + * Update all contexts prior writing the mux configurations as we need + * to make sure all slices/subslices are ON before writing to NOA + * registers. + */ + ret = gen12_configure_all_contexts(stream, oa_config); + if (ret) + return ret; + + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + if (stream->ctx) { + ret = gen12_configure_oar_context(stream, true); + if (ret) + return ret; + } + + return emit_oa_config(stream, oa_config, oa_context(stream)); } -static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) +static void gen8_disable_metric_set(struct i915_perf_stream *stream) { + struct intel_uncore *uncore = stream->uncore; + /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL); + lrc_configure_all_contexts(stream, NULL); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } -static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) +static void gen10_disable_metric_set(struct i915_perf_stream *stream) { + struct intel_uncore *uncore = stream->uncore; + /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL); + lrc_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ - I915_WRITE(RPM_CONFIG1, - I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); +} + +static void gen12_disable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + /* Reset all contexts' slices/subslices configurations. */ + gen12_configure_all_contexts(stream, NULL); + + /* disable the context save/restore or OAR counters */ + if (stream->ctx) + gen12_configure_oar_context(stream, false); + + /* Make sure we disable noa to save power. */ + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; struct i915_gem_context *ctx = stream->ctx; - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; - bool periodic = dev_priv->perf.oa.periodic; - u32 period_exponent = dev_priv->perf.oa.period_exponent; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; + u32 ctx_id = stream->specific_ctx_id; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + u32 report_format = stream->oa_buffer.format; /* * Reset buf pointers so we don't forward reports from before now. @@ -1886,22 +2561,22 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) * on the assumption that certain fields are written to zeroed * memory which this helps maintains. */ - gen7_init_oa_buffer(dev_priv); - - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); + gen7_init_oa_buffer(stream); + + intel_uncore_write(uncore, GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; + struct intel_uncore *uncore = stream->uncore; + u32 report_format = stream->oa_buffer.format; /* * Reset buf pointers so we don't forward reports from before now. @@ -1912,16 +2587,35 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) * on the assumption that certain fields are written to zeroed * memory which this helps maintains. */ - gen8_init_oa_buffer(dev_priv); + gen8_init_oa_buffer(stream); /* * Note: we don't rely on the hardware to perform single context * filtering and instead filter on the cpu based on the context-id * field of reports */ - I915_WRITE(GEN8_OACONTROL, (report_format << - GEN8_OA_REPORT_FORMAT_SHIFT) | - GEN8_OA_COUNTER_ENABLE); + intel_uncore_write(uncore, GEN8_OACONTROL, + (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | + GEN8_OA_COUNTER_ENABLE); +} + +static void gen12_oa_enable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 report_format = stream->oa_buffer.format; + + /* + * If we don't want OA reports from the OA buffer, then we don't even + * need to program the OAG unit. + */ + if (!(stream->sample_flags & SAMPLE_OA_REPORT)) + return; + + gen12_init_oa_buffer(stream); + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, + (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); } /** @@ -1935,19 +2629,17 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) */ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + stream->perf->ops.oa_enable(stream); - dev_priv->perf.oa.ops.oa_enable(stream); - - if (dev_priv->perf.oa.periodic) - hrtimer_start(&dev_priv->perf.oa.poll_check_timer, + if (stream->periodic) + hrtimer_start(&stream->poll_check_timer, ns_to_ktime(POLL_PERIOD), HRTIMER_MODE_REL_PINNED); } static void gen7_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN7_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -1958,7 +2650,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream) static void gen8_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN8_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -1967,6 +2659,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) DRM_ERROR("wait for OA to be disabled timed out\n"); } +static void gen12_oa_disable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); + if (intel_wait_for_register(uncore, + GEN12_OAG_OACONTROL, + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); +} + /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -1977,12 +2681,10 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) */ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + stream->perf->ops.oa_disable(stream); - dev_priv->perf.oa.ops.oa_disable(stream); - - if (dev_priv->perf.oa.periodic) - hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); + if (stream->periodic) + hrtimer_cancel(&stream->poll_check_timer); } static const struct i915_perf_stream_ops i915_oa_stream_ops = { @@ -2016,34 +2718,42 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_perf_open_param *param, struct perf_open_properties *props) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; int format_size; int ret; - /* If the sysfs metrics/ directory wasn't registered for some + if (!props->engine) { + DRM_DEBUG("OA engine not specified\n"); + return -EINVAL; + } + + /* + * If the sysfs metrics/ directory wasn't registered for some * reason then don't let userspace try their luck with config * IDs */ - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } - if (!(props->sample_flags & SAMPLE_OA_REPORT)) { + if (!(props->sample_flags & SAMPLE_OA_REPORT) && + (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) { DRM_DEBUG("Only OA report sampling supported\n"); return -EINVAL; } - if (!dev_priv->perf.oa.ops.enable_metric_set) { + if (!perf->ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } - /* To avoid the complexity of having to accurately filter + /* + * To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.oa.exclusive_stream) { + if (perf->exclusive_stream) { DRM_DEBUG("OA unit already in use\n"); return -EBUSY; } @@ -2053,43 +2763,28 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - /* We set up some ratelimit state to potentially throttle any _NOTES - * about spurious, invalid OA reports which we don't forward to - * userspace. - * - * The initialization is associated with opening the stream (not driver - * init) considering we print a _NOTE about any throttling when closing - * the stream instead of waiting until driver _fini which no one would - * ever see. - * - * Using the same limiting factors as printk_ratelimit() - */ - ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs, - 5 * HZ, 10); - /* Since we use a DRM_NOTE for spurious reports it would be - * inconsistent to let __ratelimit() automatically print a warning for - * throttling. - */ - ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs, - RATELIMIT_MSG_ON_RELEASE); + stream->engine = props->engine; + stream->uncore = stream->engine->gt->uncore; stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; + format_size = perf->oa_formats[props->oa_format].size; - stream->sample_flags |= SAMPLE_OA_REPORT; + stream->sample_flags = props->sample_flags; stream->sample_size += format_size; - dev_priv->perf.oa.oa_buffer.format_size = format_size; - if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) + stream->oa_buffer.format_size = format_size; + if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; - dev_priv->perf.oa.oa_buffer.format = - dev_priv->perf.oa.oa_formats[props->oa_format].format; + stream->hold_preemption = props->hold_preemption; + + stream->oa_buffer.format = + perf->oa_formats[props->oa_format].format; - dev_priv->perf.oa.periodic = props->oa_periodic; - if (dev_priv->perf.oa.periodic) - dev_priv->perf.oa.period_exponent = props->oa_period_exponent; + stream->periodic = props->oa_periodic; + if (stream->periodic) + stream->period_exponent = props->oa_period_exponent; if (stream->ctx) { ret = oa_get_render_ctx_id(stream); @@ -2099,9 +2794,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + ret = alloc_noa_wait(stream); if (ret) { + DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); + goto err_noa_wait_alloc; + } + + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); + if (!stream->oa_config) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + ret = -EINVAL; goto err_config; } @@ -2117,63 +2819,72 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engine_pm_get(stream->engine); + intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); - ret = alloc_oa_buffer(dev_priv); + ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - goto err_lock; - stream->ops = &i915_oa_stream_ops; - dev_priv->perf.oa.exclusive_stream = stream; + perf->exclusive_stream = stream; - ret = dev_priv->perf.oa.ops.enable_metric_set(stream); + ret = perf->ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - mutex_unlock(&dev_priv->drm.struct_mutex); + DRM_DEBUG("opening stream oa config uuid=%s\n", + stream->oa_config->uuid); + + hrtimer_init(&stream->poll_check_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); return 0; err_enable: - dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); -err_lock: - free_oa_buffer(dev_priv); + free_oa_buffer(stream); err_oa_buf_alloc: - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); err_config: + free_noa_wait(stream); + +err_noa_wait_alloc: if (stream->ctx) oa_put_render_ctx_id(stream); return ret; } -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *regs) +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct i915_perf_stream *stream; + /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ + if (engine->class != RENDER_CLASS) return; - stream = engine->i915->perf.oa.exclusive_stream; - if (stream) - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config); + stream = engine->i915->perf.exclusive_stream; + /* + * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller + * is already doing that, so nothing to be done for gen12 here. + */ + if (stream && INTEL_GEN(stream->perf->i915) < 12) + gen8_update_reg_state_unlocked(ce, stream); } /** @@ -2243,7 +2954,7 @@ static ssize_t i915_perf_read(struct file *file, loff_t *ppos) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; ssize_t ret; /* To ensure it's handled consistently we simply treat all reads of a @@ -2266,15 +2977,15 @@ static ssize_t i915_perf_read(struct file *file, if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } while (ret == -EAGAIN); } else { - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /* We allow the poll checking to sometimes report false positive EPOLLIN @@ -2289,7 +3000,7 @@ static ssize_t i915_perf_read(struct file *file, /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. */ - dev_priv->perf.oa.pollin = false; + stream->pollin = false; } return ret; @@ -2297,13 +3008,12 @@ static ssize_t i915_perf_read(struct file *file, static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) { - struct drm_i915_private *dev_priv = - container_of(hrtimer, typeof(*dev_priv), - perf.oa.poll_check_timer); + struct i915_perf_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); - if (oa_buffer_check_unlocked(dev_priv)) { - dev_priv->perf.oa.pollin = true; - wake_up(&dev_priv->perf.oa.poll_wq); + if (oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); } hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); @@ -2313,7 +3023,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) /** * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream - * @dev_priv: i915 device instance * @stream: An i915 perf stream * @file: An i915 perf stream file * @wait: poll() state table @@ -2322,15 +3031,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that * will be woken for new stream data. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: any poll events that are ready without sleeping */ -static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, - struct i915_perf_stream *stream, - struct file *file, - poll_table *wait) +static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait) { __poll_t events = 0; @@ -2342,7 +3050,7 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, * the hrtimer/oa_poll_check_timer_cb to notify us when there are * samples to read. */ - if (dev_priv->perf.oa.pollin) + if (stream->pollin) events |= EPOLLIN; return events; @@ -2364,12 +3072,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; __poll_t ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_poll_locked(dev_priv, stream, file, wait); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_poll_locked(stream, file, wait); + mutex_unlock(&perf->lock); return ret; } @@ -2394,6 +3102,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) if (stream->ops->enable) stream->ops->enable(stream); + + if (stream->hold_preemption) + intel_context_set_nopreempt(stream->pinned_ctx); } /** @@ -2418,17 +3129,54 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->disable() to refer to this */ stream->enabled = false; + if (stream->hold_preemption) + intel_context_clear_nopreempt(stream->pinned_ctx); + if (stream->ops->disable) stream->ops->disable(stream); } +static long i915_perf_config_locked(struct i915_perf_stream *stream, + unsigned long metrics_set) +{ + struct i915_oa_config *config; + long ret = stream->oa_config->id; + + config = i915_perf_get_oa_config(stream->perf, metrics_set); + if (!config) + return -EINVAL; + + if (config != stream->oa_config) { + int err; + + /* + * If OA is bound to a specific context, emit the + * reconfiguration inline from that context. The update + * will then be ordered with respect to submission on that + * context. + * + * When set globally, we use a low priority kernel context, + * so it will effectively take effect when idle. + */ + err = emit_oa_config(stream, config, oa_context(stream)); + if (err == 0) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + i915_oa_config_put(config); + + return ret; +} + /** * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs * @stream: An i915 perf stream * @cmd: the ioctl request * @arg: the ioctl data * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: zero on success or a negative error code. Returns -EINVAL for @@ -2445,6 +3193,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, case I915_PERF_IOCTL_DISABLE: i915_perf_disable_locked(stream); return 0; + case I915_PERF_IOCTL_CONFIG: + return i915_perf_config_locked(stream, arg); } return -EINVAL; @@ -2466,12 +3216,12 @@ static long i915_perf_ioctl(struct file *file, unsigned long arg) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; long ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_ioctl_locked(stream, cmd, arg); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); return ret; } @@ -2483,7 +3233,7 @@ static long i915_perf_ioctl(struct file *file, * Frees all resources associated with the given i915 perf @stream, disabling * any associated data capture in the process. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) @@ -2494,8 +3244,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) if (stream->ops->destroy) stream->ops->destroy(stream); - list_del(&stream->link); - if (stream->ctx) i915_gem_context_put(stream->ctx); @@ -2516,14 +3264,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); i915_perf_destroy_locked(stream); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); /* Release the reference the perf stream kept on the driver. */ - drm_dev_put(&dev_priv->drm); + drm_dev_put(&perf->i915->drm); return 0; } @@ -2545,7 +3293,7 @@ static const struct file_operations fops = { /** * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` * @props: individually validated u64 property value pairs * @file: drm file @@ -2553,7 +3301,7 @@ static const struct file_operations fops = { * See i915_perf_ioctl_open() for interface details. * * Implements further stream config validation and stream initialization on - * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex + * behalf of i915_perf_open_ioctl() with the &perf->lock mutex * taken to serialize with any non-file-operation driver hooks. * * Note: at this point the @props have only been validated in isolation and @@ -2568,7 +3316,7 @@ static const struct file_operations fops = { * Returns: zero on success or a negative error code. */ static int -i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, +i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_perf_open_param *param, struct perf_open_properties *props, struct drm_file *file) @@ -2599,16 +3347,33 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, * rest of the system, which we consider acceptable for a * non-privileged client. * - * For Gen8+ the OA unit no longer supports clock gating off for a + * For Gen8->11 the OA unit no longer supports clock gating off for a * specific context and the kernel can't securely stop the counters * from updating as system-wide / global values. Even though we can * filter reports based on the included context ID we can't block * clients from seeing the raw / global counter values via * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. + * + * For Gen12+ we gain a new OAR unit that only monitors the RCS on a + * per context basis. So we can relax requirements there if the user + * doesn't request global stream access (i.e. query based sampling + * using MI_RECORD_PERF_COUNT. */ - if (IS_HASWELL(dev_priv) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; + else if (IS_GEN(perf->i915, 12) && specific_ctx && + (props->sample_flags & SAMPLE_OA_REPORT) == 0) + privileged_op = false; + + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -2617,7 +3382,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -2628,7 +3393,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_ctx; } - stream->dev_priv = dev_priv; + stream->perf = perf; stream->ctx = specific_ctx; ret = i915_oa_stream_init(stream, param, props); @@ -2644,8 +3409,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_flags; } - list_add(&stream->link, &dev_priv->perf.streams); - if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) f_flags |= O_CLOEXEC; if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) @@ -2654,7 +3417,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); if (stream_fd < 0) { ret = stream_fd; - goto err_open; + goto err_flags; } if (!(param->flags & I915_PERF_FLAG_DISABLED)) @@ -2663,12 +3426,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, /* Take a reference on the driver that will be kept with stream_fd * until its release. */ - drm_dev_get(&dev_priv->drm); + drm_dev_get(&perf->i915->drm); return stream_fd; -err_open: - list_del(&stream->link); err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); @@ -2681,15 +3442,15 @@ err: return ret; } -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); } /** * read_properties_unlocked - validate + copy userspace stream open properties - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @uprops: The array of u64 key value pairs given by userspace * @n_props: The number of key value pairs expected in @uprops * @props: The stream configuration built up while validating properties @@ -2702,7 +3463,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) * we shouldn't validate or assume anything about ordering here. This doesn't * rule out defining new properties with ordering requirements in the future. */ -static int read_properties_unlocked(struct drm_i915_private *dev_priv, +static int read_properties_unlocked(struct i915_perf *perf, u64 __user *uprops, u32 n_props, struct perf_open_properties *props) @@ -2717,6 +3478,15 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, return -EINVAL; } + /* At the moment we only support using i915-perf on the RCS. */ + props->engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0); + if (!props->engine) { + DRM_DEBUG("No RENDER-capable engines\n"); + return -EINVAL; + } + /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is @@ -2768,7 +3538,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, value); return -EINVAL; } - if (!dev_priv->perf.oa.oa_formats[value].size) { + if (!perf->oa_formats[value].size) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -2789,7 +3559,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, */ BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = oa_exponent_to_ns(dev_priv, value); + oa_period = oa_exponent_to_ns(perf, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -2814,6 +3584,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = !!value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -2843,7 +3616,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, * mutex to avoid an awkward lockdep with mmap_sem. * * Most of the implementation details are handled by - * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock + * i915_perf_open_ioctl_locked() after taking the &perf->lock * mutex for serializing with any non-file-operation driver hooks. * * Return: A newly opened i915 Perf stream file descriptor or negative @@ -2852,13 +3625,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_open_param *param = data; struct perf_open_properties props; u32 known_open_flags; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -2871,124 +3644,130 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = read_properties_unlocked(dev_priv, + ret = read_properties_unlocked(perf, u64_to_user_ptr(param->properties_ptr), param->num_properties, &props); if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_open_ioctl_locked(perf, param, &props, file); + mutex_unlock(&perf->lock); return ret; } /** * i915_perf_register - exposes i915-perf to userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * In particular OA metric sets are advertised under a sysfs metrics/ * directory allowing userspace to enumerate valid IDs that can be * used to open an i915-perf stream. */ -void i915_perf_register(struct drm_i915_private *dev_priv) +void i915_perf_register(struct drm_i915_private *i915) { + struct i915_perf *perf = &i915->perf; int ret; - if (!dev_priv->perf.initialized) + if (!perf->i915) return; /* To be sure we're synchronized with an attempted * i915_perf_open_ioctl(); considering that we register after * being exposed to userspace. */ - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); - dev_priv->perf.metrics_kobj = + perf->metrics_kobj = kobject_create_and_add("metrics", - &dev_priv->drm.primary->kdev->kobj); - if (!dev_priv->perf.metrics_kobj) + &i915->drm.primary->kdev->kobj); + if (!perf->metrics_kobj) goto exit; - sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); - - if (INTEL_GEN(dev_priv) >= 11) { - i915_perf_load_test_config_icl(dev_priv); - } else if (IS_CANNONLAKE(dev_priv)) { - i915_perf_load_test_config_cnl(dev_priv); - } else if (IS_COFFEELAKE(dev_priv)) { - if (IS_CFL_GT2(dev_priv)) - i915_perf_load_test_config_cflgt2(dev_priv); - if (IS_CFL_GT3(dev_priv)) - i915_perf_load_test_config_cflgt3(dev_priv); - } else if (IS_GEMINILAKE(dev_priv)) { - i915_perf_load_test_config_glk(dev_priv); - } else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) - i915_perf_load_test_config_kblgt2(dev_priv); - else if (IS_KBL_GT3(dev_priv)) - i915_perf_load_test_config_kblgt3(dev_priv); - } else if (IS_BROXTON(dev_priv)) { - i915_perf_load_test_config_bxt(dev_priv); - } else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) - i915_perf_load_test_config_sklgt2(dev_priv); - else if (IS_SKL_GT3(dev_priv)) - i915_perf_load_test_config_sklgt3(dev_priv); - else if (IS_SKL_GT4(dev_priv)) - i915_perf_load_test_config_sklgt4(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - i915_perf_load_test_config_chv(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - i915_perf_load_test_config_bdw(dev_priv); - } else if (IS_HASWELL(dev_priv)) { - i915_perf_load_test_config_hsw(dev_priv); -} - - if (dev_priv->perf.oa.test_config.id == 0) + sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); + + if (IS_TIGERLAKE(i915)) { + i915_perf_load_test_config_tgl(i915); + } else if (INTEL_GEN(i915) >= 11) { + i915_perf_load_test_config_icl(i915); + } else if (IS_CANNONLAKE(i915)) { + i915_perf_load_test_config_cnl(i915); + } else if (IS_COFFEELAKE(i915)) { + if (IS_CFL_GT2(i915)) + i915_perf_load_test_config_cflgt2(i915); + if (IS_CFL_GT3(i915)) + i915_perf_load_test_config_cflgt3(i915); + } else if (IS_GEMINILAKE(i915)) { + i915_perf_load_test_config_glk(i915); + } else if (IS_KABYLAKE(i915)) { + if (IS_KBL_GT2(i915)) + i915_perf_load_test_config_kblgt2(i915); + else if (IS_KBL_GT3(i915)) + i915_perf_load_test_config_kblgt3(i915); + } else if (IS_BROXTON(i915)) { + i915_perf_load_test_config_bxt(i915); + } else if (IS_SKYLAKE(i915)) { + if (IS_SKL_GT2(i915)) + i915_perf_load_test_config_sklgt2(i915); + else if (IS_SKL_GT3(i915)) + i915_perf_load_test_config_sklgt3(i915); + else if (IS_SKL_GT4(i915)) + i915_perf_load_test_config_sklgt4(i915); + } else if (IS_CHERRYVIEW(i915)) { + i915_perf_load_test_config_chv(i915); + } else if (IS_BROADWELL(i915)) { + i915_perf_load_test_config_bdw(i915); + } else if (IS_HASWELL(i915)) { + i915_perf_load_test_config_hsw(i915); + } + + if (perf->test_config.id == 0) goto sysfs_error; - ret = sysfs_create_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.oa.test_config.sysfs_metric); + ret = sysfs_create_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + perf->test_config.perf = perf; + kref_init(&perf->test_config.ref); goto exit; sysfs_error: - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; exit: - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /** * i915_perf_unregister - hide i915-perf from userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * i915-perf state cleanup is split up into an 'unregister' and * 'deinit' phase where the interface is first hidden from * userspace by i915_perf_unregister() before cleaning up * remaining state in i915_perf_fini(). */ -void i915_perf_unregister(struct drm_i915_private *dev_priv) +void i915_perf_unregister(struct drm_i915_private *i915) { - if (!dev_priv->perf.metrics_kobj) + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj) return; - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.oa.test_config.sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; } -static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) { static const i915_reg_t flex_eu_regs[] = { EU_PERF_CNTL0, @@ -3008,56 +3787,80 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) return false; } -static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +#define ADDR_IN_RANGE(addr, start, end) \ + ((addr) >= (start) && \ + (addr) <= (end)) + +#define REG_IN_RANGE(addr, start, end) \ + ((addr) >= i915_mmio_reg_offset(start) && \ + (addr) <= i915_mmio_reg_offset(end)) + +#define REG_EQUAL(addr, mmio) \ + ((addr) == i915_mmio_reg_offset(mmio)) + +static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); +} + +static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || - (addr >= i915_mmio_reg_offset(OACEC0_0) && - addr <= i915_mmio_reg_offset(OACEC7_1)); + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); } -static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && - addr <= i915_mmio_reg_offset(NOA_WRITE)) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); + return gen7_is_valid_mux_addr(perf, addr) || + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } -static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); + return gen8_is_valid_mux_addr(perf, addr) || + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); } -static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen8_is_valid_mux_addr(dev_priv, addr) || - addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); + return gen7_is_valid_mux_addr(perf, addr) || + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || + REG_EQUAL(addr, HSW_MBVID2_MISR0); } -static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || - (addr >= 0x25100 && addr <= 0x2FF90) || - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); + return gen7_is_valid_mux_addr(perf, addr) || + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } -static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || - (addr >= 0x182300 && addr <= 0x1823A4); + return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || + REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || + REG_EQUAL(addr, GEN12_OAA_DBG_REG) || + REG_EQUAL(addr, GEN12_OAG_OA_PESS) || + REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); +} + +static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +{ + return REG_EQUAL(addr, NOA_WRITE) || + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_EQUAL(addr, GDT_CHICKEN_BITS) || + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_EQUAL(addr, RPM_CONFIG0) || + REG_EQUAL(addr, RPM_CONFIG1) || + REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); } static u32 mask_reg_value(u32 reg, u32 val) @@ -3066,21 +3869,21 @@ static u32 mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; } -static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, - bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), +static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, + bool (*is_valid)(struct i915_perf *perf, u32 addr), u32 __user *regs, u32 n_regs) { @@ -3110,7 +3913,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, if (err) goto addr_err; - if (!is_valid(dev_priv, addr)) { + if (!is_valid(perf, addr)) { DRM_DEBUG("Invalid oa_reg address: %X\n", addr); err = -EINVAL; goto addr_err; @@ -3143,7 +3946,7 @@ static ssize_t show_dynamic_id(struct device *dev, return sprintf(buf, "%d\n", oa_config->id); } -static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, +static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, struct i915_oa_config *oa_config) { sysfs_attr_init(&oa_config->sysfs_metric_id.attr); @@ -3158,7 +3961,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, oa_config->sysfs_metric.name = oa_config->uuid; oa_config->sysfs_metric.attrs = oa_config->attrs; - return sysfs_create_group(dev_priv->perf.metrics_kobj, + return sysfs_create_group(perf->metrics_kobj, &oa_config->sysfs_metric); } @@ -3178,17 +3981,18 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_oa_config *args = data; struct i915_oa_config *oa_config, *tmp; + struct i915_oa_reg *regs; int err, id; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -3211,7 +4015,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - atomic_set(&oa_config->ref_count, 1); + oa_config->perf = perf; + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3225,59 +4030,59 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); oa_config->mux_regs_len = args->n_mux_regs; - oa_config->mux_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_mux_reg, - u64_to_user_ptr(args->mux_regs_ptr), - args->n_mux_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); - if (IS_ERR(oa_config->mux_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for mux_regs\n"); - err = PTR_ERR(oa_config->mux_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->mux_regs = regs; oa_config->b_counter_regs_len = args->n_boolean_regs; - oa_config->b_counter_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_b_counter_reg, - u64_to_user_ptr(args->boolean_regs_ptr), - args->n_boolean_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); - if (IS_ERR(oa_config->b_counter_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); - err = PTR_ERR(oa_config->b_counter_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->b_counter_regs = regs; - if (INTEL_GEN(dev_priv) < 8) { + if (INTEL_GEN(perf->i915) < 8) { if (args->n_flex_regs != 0) { err = -EINVAL; goto reg_err; } } else { oa_config->flex_regs_len = args->n_flex_regs; - oa_config->flex_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_flex_reg, - u64_to_user_ptr(args->flex_regs_ptr), - args->n_flex_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); - if (IS_ERR(oa_config->flex_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for flex_regs\n"); - err = PTR_ERR(oa_config->flex_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->flex_regs = regs; } - err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + err = mutex_lock_interruptible(&perf->metrics_lock); if (err) goto reg_err; /* We shouldn't have too many configs, so this iteration shouldn't be * too costly. */ - idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + idr_for_each_entry(&perf->metrics_idr, tmp, id) { if (!strcmp(tmp->uuid, oa_config->uuid)) { DRM_DEBUG("OA config already exists with this uuid\n"); err = -EADDRINUSE; @@ -3285,14 +4090,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } - err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + err = create_dynamic_oa_sysfs_entry(perf, oa_config); if (err) { DRM_DEBUG("Failed to create sysfs entry for OA config\n"); goto sysfs_err; } /* Config id 0 is invalid, id 1 for kernel stored test config. */ - oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); if (oa_config->id < 0) { @@ -3301,16 +4106,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, goto sysfs_err; } - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); return oa_config->id; sysfs_err: - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); reg_err: - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3329,12 +4134,12 @@ reg_err: int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; u64 *arg = data; struct i915_oa_config *oa_config; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3344,31 +4149,33 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, return -EACCES; } - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) - goto lock_err; + return ret; - oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { DRM_DEBUG("Failed to remove unknown OA config\n"); ret = -ENOENT; - goto config_err; + goto err_unlock; } GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &oa_config->sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); + + idr_remove(&perf->metrics_idr, *arg); - idr_remove(&dev_priv->perf.metrics_idr, *arg); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); -config_err: - mutex_unlock(&dev_priv->perf.metrics_lock); -lock_err: + return 0; + +err_unlock: + mutex_unlock(&perf->metrics_lock); return ret; } @@ -3415,140 +4222,209 @@ static struct ctl_table dev_root[] = { }; /** - * i915_perf_init - initialize i915-perf state on module load - * @dev_priv: i915 device instance + * i915_perf_init - initialize i915-perf state on module bind + * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. * * Note: i915-perf initialization is split into an 'init' and 'register' * phase with the i915_perf_register() exposing state to userspace. */ -void i915_perf_init(struct drm_i915_private *dev_priv) -{ - if (IS_HASWELL(dev_priv)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = - hsw_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; - dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.oa.ops.read = gen7_oa_read; - dev_priv->perf.oa.ops.oa_hw_tail_read = - gen7_oa_hw_tail_read; - - dev_priv->perf.oa.oa_formats = hsw_oa_formats; - } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { +void i915_perf_init(struct drm_i915_private *i915) +{ + struct i915_perf *perf = &i915->perf; + + /* XXX const struct i915_perf_ops! */ + + if (IS_HASWELL(i915)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = NULL; + perf->ops.enable_metric_set = hsw_enable_metric_set; + perf->ops.disable_metric_set = hsw_disable_metric_set; + perf->ops.oa_enable = gen7_oa_enable; + perf->ops.oa_disable = gen7_oa_disable; + perf->ops.read = gen7_oa_read; + perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + + perf->oa_formats = hsw_oa_formats; + } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; + perf->ops.read = gen8_oa_read; - dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.oa.ops.read = gen8_oa_read; - dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + if (IS_GEN_RANGE(i915, 8, 9)) { + perf->oa_formats = gen8_plus_oa_formats; - if (IS_GEN_RANGE(dev_priv, 8, 9)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen8_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.oa.ops.is_valid_mux_reg = + if (IS_CHERRYVIEW(i915)) { + perf->ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (IS_GEN(dev_priv, 8)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + if (IS_GEN(i915, 8)) { + perf->ctx_oactxctrl_offset = 0x120; + perf->ctx_flexeu0_offset = 0x2ce; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + perf->gen8_valid_ctx_bit = BIT(25); } else { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = + } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->oa_formats = gen8_plus_oa_formats; + + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen10_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (IS_GEN(dev_priv, 10)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + if (IS_GEN(i915, 10)) { + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; } else { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e; + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; } - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + perf->gen8_valid_ctx_bit = BIT(16); + } else if (IS_GEN(i915, 12)) { + perf->oa_formats = gen12_oa_formats; + + perf->ops.is_valid_b_counter_reg = + gen12_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = + gen12_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + perf->ops.oa_enable = gen12_oa_enable; + perf->ops.oa_disable = gen12_oa_disable; + perf->ops.enable_metric_set = gen12_enable_metric_set; + perf->ops.disable_metric_set = gen12_disable_metric_set; + perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; + + perf->ctx_flexeu0_offset = 0; + perf->ctx_oactxctrl_offset = 0x144; } } - if (dev_priv->perf.oa.ops.enable_metric_set) { - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); - - INIT_LIST_HEAD(&dev_priv->perf.streams); - mutex_init(&dev_priv->perf.lock); - spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); + if (perf->ops.enable_metric_set) { + mutex_init(&perf->lock); oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - mutex_init(&dev_priv->perf.metrics_lock); - idr_init(&dev_priv->perf.metrics_idr); + mutex_init(&perf->metrics_lock); + idr_init(&perf->metrics_idr); - dev_priv->perf.initialized = true; + /* We set up some ratelimit state to potentially throttle any + * _NOTES about spurious, invalid OA reports which we don't + * forward to userspace. + * + * We print a _NOTE about any throttling when closing the + * stream instead of waiting until driver _fini which no one + * would ever see. + * + * Using the same limiting factors as printk_ratelimit() + */ + ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); + /* Since we use a DRM_NOTE for spurious reports it would be + * inconsistent to let __ratelimit() automatically print a + * warning for throttling. + */ + ratelimit_set_flags(&perf->spurious_report_rs, + RATELIMIT_MSG_ON_RELEASE); + + atomic64_set(&perf->noa_programming_delay, + 500 * 1000 /* 500us */); + + perf->i915 = i915; } } static int destroy_config(int id, void *p, void *data) { - struct drm_i915_private *dev_priv = data; - struct i915_oa_config *oa_config = p; + i915_oa_config_put(p); + return 0; +} - put_oa_config(dev_priv, oa_config); +void i915_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl_table(dev_root); +} - return 0; +void i915_perf_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); } /** * i915_perf_fini - Counter part to i915_perf_init() - * @dev_priv: i915 device instance + * @i915: i915 device instance */ -void i915_perf_fini(struct drm_i915_private *dev_priv) +void i915_perf_fini(struct drm_i915_private *i915) { - if (!dev_priv->perf.initialized) - return; + struct i915_perf *perf = &i915->perf; - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); - idr_destroy(&dev_priv->perf.metrics_idr); + if (!perf->i915) + return; - unregister_sysctl_table(dev_priv->perf.sysctl_header); + idr_for_each(&perf->metrics_idr, destroy_config, perf); + idr_destroy(&perf->metrics_idr); - memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); + memset(&perf->ops, 0, sizeof(perf->ops)); + perf->i915 = NULL; +} - dev_priv->perf.initialized = false; +/** + * i915_perf_ioctl_version - Version of the i915-perf subsystem + * + * This version number is used by userspace to detect available features. + */ +int i915_perf_ioctl_version(void) +{ + /* + * 1: Initial version + * I915_PERF_IOCTL_ENABLE + * I915_PERF_IOCTL_DISABLE + * + * 2: Added runtime modification of OA config. + * I915_PERF_IOCTL_CONFIG + * + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold + * preemption on a particular context so that performance data is + * accessible from a delta of MI_RPC reports without looking at the + * OA buffer. + */ + return 3; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_perf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h new file mode 100644 index 000000000000..882fdd0a7680 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_PERF_H__ +#define __I915_PERF_H__ + +#include <linux/kref.h> +#include <linux/types.h> + +#include "i915_perf_types.h" + +struct drm_device; +struct drm_file; +struct drm_i915_private; +struct i915_oa_config; +struct intel_context; +struct intel_engine_cs; + +void i915_perf_init(struct drm_i915_private *i915); +void i915_perf_fini(struct drm_i915_private *i915); +void i915_perf_register(struct drm_i915_private *i915); +void i915_perf_unregister(struct drm_i915_private *i915); +int i915_perf_ioctl_version(void); +void i915_perf_sysctl_register(void); +void i915_perf_sysctl_unregister(void); + +int i915_perf_open_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); + +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set); + +static inline struct i915_oa_config * +i915_oa_config_get(struct i915_oa_config *oa_config) +{ + if (kref_get_unless_zero(&oa_config->ref)) + return oa_config; + else + return NULL; +} + +void i915_oa_config_release(struct kref *ref); +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, i915_oa_config_release); +} + +#endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h new file mode 100644 index 000000000000..45e581455f5d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -0,0 +1,434 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_PERF_TYPES_H_ +#define _I915_PERF_TYPES_H_ + +#include <linux/atomic.h> +#include <linux/device.h> +#include <linux/hrtimer.h> +#include <linux/llist.h> +#include <linux/poll.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/wait.h> + +#include "i915_reg.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct file; +struct i915_gem_context; +struct i915_perf; +struct i915_vma; +struct intel_context; +struct intel_engine_cs; + +struct i915_oa_format { + u32 format; + int size; +}; + +struct i915_oa_reg { + i915_reg_t addr; + u32 value; +}; + +struct i915_oa_config { + struct i915_perf *perf; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct i915_oa_reg *mux_regs; + u32 mux_regs_len; + const struct i915_oa_reg *b_counter_regs; + u32 b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + u32 flex_regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct device_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + +struct i915_perf_stream; + +/** + * struct i915_perf_stream_ops - the OPs to support a specific stream type + */ +struct i915_perf_stream_ops { + /** + * @enable: Enables the collection of HW samples, either in response to + * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened + * without `I915_PERF_FLAG_DISABLED`. + */ + void (*enable)(struct i915_perf_stream *stream); + + /** + * @disable: Disables the collection of HW samples, either in response + * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying + * the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /** + * @poll_wait: Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /** + * @wait_unlocked: For handling a blocking read, wait until there is + * something to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait(). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /** + * @read: Copy buffered metrics as records to userspace + * **buf**: the userspace, destination buffer + * **count**: the number of bytes to copy, requested by userspace + * **offset**: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been copied so far and + * the buffer offset for copying the next record. + * + * Copy as many buffered i915 perf samples and records for this stream + * to userspace as will fit in the given buffer. + * + * Only write complete records; returning -%ENOSPC if there isn't room + * for a complete record. + * + * Return any error condition that results in a short read such as + * -%ENOSPC or -%EFAULT, even though these may be squashed before + * returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @destroy: Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +/** + * struct i915_perf_stream - state for a single open stream FD + */ +struct i915_perf_stream { + /** + * @perf: i915_perf backpointer + */ + struct i915_perf *perf; + + /** + * @uncore: mmio access path + */ + struct intel_uncore *uncore; + + /** + * @engine: Engine associated with this performance stream. + */ + struct intel_engine_cs *engine; + + /** + * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` + * properties given when opening a stream, representing the contents + * of a single sample as read() by userspace. + */ + u32 sample_flags; + + /** + * @sample_size: Considering the configured contents of a sample + * combined with the required header size, this is the total size + * of a single sample record. + */ + int sample_size; + + /** + * @ctx: %NULL if measuring system-wide across all contexts or a + * specific context that is being monitored. + */ + struct i915_gem_context *ctx; + + /** + * @enabled: Whether the stream is currently enabled, considering + * whether the stream was opened in a disabled state and based + * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. + */ + bool enabled; + + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + + /** + * @ops: The callbacks providing the implementation of this specific + * type of configured stream. + */ + const struct i915_perf_stream_ops *ops; + + /** + * @oa_config: The OA configuration used by the stream. + */ + struct i915_oa_config *oa_config; + + /** + * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily + * each time @oa_config changes. + */ + struct llist_head oa_config_bos; + + /** + * @pinned_ctx: The OA context specific information. + */ + struct intel_context *pinned_ctx; + + /** + * @specific_ctx_id: The id of the specific context. + */ + u32 specific_ctx_id; + + /** + * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. + */ + u32 specific_ctx_id_mask; + + /** + * @poll_check_timer: High resolution timer that will periodically + * check for data in the circular OA buffer for notifying userspace + * (e.g. during a read() or poll()). + */ + struct hrtimer poll_check_timer; + + /** + * @poll_wq: The wait queue that hrtimer callback wakes when it + * sees data ready to read in the circular OA buffer. + */ + wait_queue_head_t poll_wq; + + /** + * @pollin: Whether there is data available to read. + */ + bool pollin; + + /** + * @periodic: Whether periodic sampling is currently enabled. + */ + bool periodic; + + /** + * @period_exponent: The OA unit sampling frequency is derived from this. + */ + int period_exponent; + + /** + * @oa_buffer: State of the OA buffer. + */ + struct { + struct i915_vma *vma; + u8 *vaddr; + u32 last_ctx_id; + int format; + int format_size; + int size_exponent; + + /** + * @ptr_lock: Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state needs to be read + * consistently from a hrtimer callback (atomic context) and + * read() fop (user context) with tail pointer updates happening + * in atomic context and head updates in user context and the + * (unlikely) possibility of read() errors needing to reset all + * head/tail state. + * + * Note: Contention/performance aren't currently a significant + * concern here considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that reads typically only + * happen in response to a hrtimer event and likely complete + * before the next callback. + * + * Note: This lock is not held *while* reading and copying data + * to userspace so the value of head observed in htrimer + * callbacks won't represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to + * used for reading. + * + * Initial values of 0xffffffff are invalid and imply that an + * update is required (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * @aged_tail_idx: Index for the aged tail ready to read() data up to. + */ + unsigned int aged_tail_idx; + + /** + * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer + * was read; used to determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** + * @head: Although we can always read back the head pointer register, + * we prefer to avoid trusting the HW state, just to avoid any + * risk that some hardware condition could * somehow bump the + * head pointer unpredictably and cause us to forward the wrong + * OA buffer data to userspace. + */ + u32 head; + } oa_buffer; + + /** + * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be + * reprogrammed. + */ + struct i915_vma *noa_wait; +}; + +/** + * struct i915_oa_ops - Gen specific implementation of an OA unit stream + */ +struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as + * disabling EU clock gating as required. + */ + int (*enable_metric_set)(struct i915_perf_stream *stream); + + /** + * @disable_metric_set: Remove system constraints associated with using + * the OA unit. + */ + void (*disable_metric_set)(struct i915_perf_stream *stream); + + /** + * @oa_enable: Enable periodic sampling + */ + void (*oa_enable)(struct i915_perf_stream *stream); + + /** + * @oa_disable: Disable periodic sampling + */ + void (*oa_disable)(struct i915_perf_stream *stream); + + /** + * @read: Copy data from the circular OA buffer into a given userspace + * buffer. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @oa_hw_tail_read: read the OA tail pointer register + * + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. + */ + u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); +}; + +struct i915_perf { + struct drm_i915_private *i915; + + struct kobject *metrics_kobj; + + /* + * Lock associated with adding/modifying/removing OA configs + * in perf->metrics_idr. + */ + struct mutex metrics_lock; + + /* + * List of dynamic configurations (struct i915_oa_config), you + * need to hold perf->metrics_lock to access it. + */ + struct idr metrics_idr; + + /* + * Lock associated with anything below within this structure + * except exclusive_stream. + */ + struct mutex lock; + + /* + * The stream currently using the OA unit. If accessed + * outside a syscall associated to its file + * descriptor. + */ + struct i915_perf_stream *exclusive_stream; + + /** + * For rate limiting any notifications of spurious + * invalid OA reports + */ + struct ratelimit_state spurious_report_rs; + + struct i915_oa_config test_config; + + u32 gen7_latched_oastatus1; + u32 ctx_oactxctrl_offset; + u32 ctx_flexeu0_offset; + + /** + * The RPT_ID/reason field for Gen8+ includes a bit + * to determine if the CTX ID in the report is valid + * but the specific bit differs between Gen 8 and 9 + */ + u32 gen8_valid_ctx_bit; + + struct i915_oa_ops ops; + const struct i915_oa_format *oa_formats; + + atomic64_t noa_programming_delay; +}; + +#endif /* _I915_PERF_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8fe46ee920a0..ec0299490dd4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -8,6 +8,11 @@ #include <linux/pm_runtime.h> #include "gt/intel_engine.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -74,8 +79,9 @@ static unsigned int event_enabled_bit(struct perf_event *event) return config_enabled_bit(event->attr.config); } -static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) { + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); u64 enable; /* @@ -83,7 +89,7 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) * * We start with a bitmask of all currently enabled events. */ - enable = i915->pmu.enable; + enable = pmu->enable; /* * Mask out all the ones which do not need the timer, or in @@ -102,10 +108,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. - * - * Use RCS as proxy for all engines. */ - else if (intel_engine_supports_stats(i915->engine[RCS0])) + else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) enable &= ~BIT(I915_SAMPLE_BUSY); /* @@ -114,42 +118,139 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) return enable; } +static u64 __get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + u64 val; + + val = intel_rc6_residency_ns(>->rc6, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); + + return val; +} + +#if IS_ENABLED(CONFIG_PM) + +static inline s64 ktime_since(const ktime_t kt) +{ + return ktime_to_ns(ktime_sub(ktime_get(), kt)); +} + +static u64 get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct i915_pmu *pmu = &i915->pmu; + unsigned long flags; + bool awake = false; + u64 val; + + if (intel_gt_pm_get_if_awake(gt)) { + val = __get_rc6(gt); + intel_gt_pm_put_async(gt); + awake = true; + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (awake) { + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + /* + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. + */ + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + } + + if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) + val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; + else + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + + pmu->sleep_last = ktime_get(); +} + +#else + +static u64 get_rc6(struct intel_gt *gt) +{ + return __get_rc6(gt); +} + +static void park_rc6(struct drm_i915_private *i915) {} + +#endif + +static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) +{ + if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { + pmu->timer_enabled = true; + pmu->timer_last = ktime_get(); + hrtimer_start_range_ns(&pmu->timer, + ns_to_ktime(PERIOD), 0, + HRTIMER_MODE_REL_PINNED); + } +} + void i915_pmu_gt_parked(struct drm_i915_private *i915) { - if (!i915->pmu.base.event_init) + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) return; - spin_lock_irq(&i915->pmu.lock); + spin_lock_irq(&pmu->lock); + + park_rc6(i915); + /* * Signal sampling timer to stop if only engine events are enabled and * GPU went idle. */ - i915->pmu.timer_enabled = pmu_needs_timer(i915, false); - spin_unlock_irq(&i915->pmu.lock); -} + pmu->timer_enabled = pmu_needs_timer(pmu, false); -static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) -{ - if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { - i915->pmu.timer_enabled = true; - i915->pmu.timer_last = ktime_get(); - hrtimer_start_range_ns(&i915->pmu.timer, - ns_to_ktime(PERIOD), 0, - HRTIMER_MODE_REL_PINNED); - } + spin_unlock_irq(&pmu->lock); } void i915_pmu_gt_unparked(struct drm_i915_private *i915) { - if (!i915->pmu.base.event_init) + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) return; - spin_lock_irq(&i915->pmu.lock); + spin_lock_irq(&pmu->lock); + /* * Re-enable sampling timer when GPU goes active. */ - __i915_pmu_maybe_start_timer(i915); - spin_unlock_irq(&i915->pmu.lock); + __i915_pmu_maybe_start_timer(pmu); + + spin_unlock_irq(&pmu->lock); } static void @@ -158,38 +259,59 @@ add_sample(struct i915_pmu_sample *sample, u32 val) sample->cur += val; } +static bool exclusive_mmio_access(const struct drm_i915_private *i915) +{ + /* + * We have to avoid concurrent mmio cache line access on gen7 or + * risk a machine hang. For a fun history lesson dig out the old + * userspace intel_gpu_top and run it on Ivybridge or Haswell! + */ + return IS_GEN(i915, 7); +} + static void -engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) +engines_sample(struct intel_gt *gt, unsigned int period_ns) { + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long flags; - if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) + if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; - wakeref = 0; - if (READ_ONCE(dev_priv->gt.awake)) - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); - if (!wakeref) + if (!intel_gt_pm_is_awake(gt)) return; - spin_lock_irqsave(&dev_priv->uncore.lock, flags); - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, gt, id) { struct intel_engine_pmu *pmu = &engine->pmu; + spinlock_t *mmio_lock; + unsigned long flags; bool busy; u32 val; - val = I915_READ_FW(RING_CTL(engine->mmio_base)); - if (val == 0) /* powerwell off => engine idle */ + if (!intel_engine_pm_get_if_awake(engine)) continue; + mmio_lock = NULL; + if (exclusive_mmio_access(i915)) + mmio_lock = &engine->uncore->lock; + + if (unlikely(mmio_lock)) + spin_lock_irqsave(mmio_lock, flags); + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + goto skip; + if (val & RING_WAIT) add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); if (val & RING_WAIT_SEMAPHORE) add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + goto skip; + /* * While waiting on a semaphore or event, MI_MODE reports the * ring as idle. However, previously using the seqno, and with @@ -199,15 +321,17 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) */ busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); if (!busy) { - val = I915_READ_FW(RING_MI_MODE(engine->mmio_base)); + val = ENGINE_READ_FW(engine, RING_MI_MODE); busy = !(val & MODE_IDLE); } if (busy) add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); - } - spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); +skip: + if (unlikely(mmio_lock)) + spin_unlock_irqrestore(mmio_lock, flags); + intel_engine_pm_put_async(engine); + } } static void @@ -216,52 +340,74 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) sample->cur += mul_u32_u32(val, mul); } +static bool frequency_sampling_enabled(struct i915_pmu *pmu) +{ + return pmu->enable & + (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); +} + static void -frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) +frequency_sample(struct intel_gt *gt, unsigned int period_ns) { - if (dev_priv->pmu.enable & - config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { - u32 val; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct i915_pmu *pmu = &i915->pmu; + struct intel_rps *rps = >->rps; - val = dev_priv->gt_pm.rps.cur_freq; - if (dev_priv->gt.awake) { - intel_wakeref_t wakeref; + if (!frequency_sampling_enabled(pmu)) + return; - with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm, - wakeref) { - val = intel_uncore_read_notrace(&dev_priv->uncore, - GEN6_RPSTAT1); - val = intel_get_cagf(dev_priv, val); - } - } + /* Report 0/0 (actual/requested) frequency while parked. */ + if (!intel_gt_pm_get_if_awake(gt)) + return; - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(dev_priv, val), - period_ns / 1000); + if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + u32 val; + + /* + * We take a quick peek here without using forcewake + * so that we don't perturb the system under observation + * (forcewake => !rc6 => increased power use). We expect + * that if the read fails because it is outside of the + * mmio power well, then it will return 0 -- in which + * case we assume the system is running at the intended + * frequency. Fortunately, the read should rarely fail! + */ + val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); + if (val) + val = intel_rps_get_cagf(rps, val); + else + val = rps->cur_freq; + + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], + intel_gpu_freq(rps, val), period_ns / 1000); } - if (dev_priv->pmu.enable & - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.cur_freq), + if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], + intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); } + + intel_gt_pm_put_async(gt); } static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) { struct drm_i915_private *i915 = container_of(hrtimer, struct drm_i915_private, pmu.timer); + struct i915_pmu *pmu = &i915->pmu; + struct intel_gt *gt = &i915->gt; unsigned int period_ns; ktime_t now; - if (!READ_ONCE(i915->pmu.timer_enabled)) + if (!READ_ONCE(pmu->timer_enabled)) return HRTIMER_NORESTART; now = ktime_get(); - period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last)); - i915->pmu.timer_last = now; + period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); + pmu->timer_last = now; /* * Strictly speaking the passed in period may not be 100% accurate for @@ -269,8 +415,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) * grabbing the forcewake. However the potential error from timer call- * back delay greatly dominates this so we keep it simple. */ - engines_sample(i915, period_ns); - frequency_sample(i915, period_ns); + engines_sample(gt, period_ns); + frequency_sample(gt, period_ns); hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); @@ -423,105 +569,11 @@ static int i915_pmu_event_init(struct perf_event *event) return 0; } -static u64 __get_rc6(struct drm_i915_private *i915) -{ - u64 val; - - val = intel_rc6_residency_ns(i915, - IS_VALLEYVIEW(i915) ? - VLV_GT_RENDER_RC6 : - GEN6_GT_GFX_RC6); - - if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - - if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); - - return val; -} - -static u64 get_rc6(struct drm_i915_private *i915) -{ -#if IS_ENABLED(CONFIG_PM) - struct intel_runtime_pm *rpm = &i915->runtime_pm; - intel_wakeref_t wakeref; - unsigned long flags; - u64 val; - - wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (wakeref) { - val = __get_rc6(i915); - intel_runtime_pm_put(rpm, wakeref); - - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - - spin_lock_irqsave(&i915->pmu.lock, flags); - - if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - spin_unlock_irqrestore(&i915->pmu.lock, flags); - } else { - struct device *kdev = rpm->kdev; - - /* - * We are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - spin_lock_irqsave(&i915->pmu.lock, flags); - - /* - * After the above branch intel_runtime_pm_get_if_in_use failed - * to get the runtime PM reference we cannot assume we are in - * runtime suspend since we can either: a) race with coming out - * of it before we took the power.lock, or b) there are other - * states than suspended which can bring us here. - * - * We need to double-check that we are indeed currently runtime - * suspended and if not we cannot do better than report the last - * known RC6 value. - */ - if (pm_runtime_status_suspended(kdev)) { - val = pm_runtime_suspended_time(kdev); - - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - i915->pmu.suspended_time_last = val; - - val -= i915->pmu.suspended_time_last; - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } else { - val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; - } - - spin_unlock_irqrestore(&i915->pmu.lock, flags); - } - - return val; -#else - return __get_rc6(i915); -#endif -} - static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = &i915->pmu; u64 val = 0; if (is_engine_event(event)) { @@ -544,19 +596,19 @@ static u64 __i915_pmu_event_read(struct perf_event *event) switch (event->attr.config) { case I915_PMU_ACTUAL_FREQUENCY: val = - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, + div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, USEC_PER_SEC /* to MHz */); break; case I915_PMU_REQUESTED_FREQUENCY: val = - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, + div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, USEC_PER_SEC /* to MHz */); break; case I915_PMU_INTERRUPTS: val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915); + val = get_rc6(&i915->gt); break; } } @@ -584,24 +636,35 @@ static void i915_pmu_enable(struct perf_event *event) struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); + struct i915_pmu *pmu = &i915->pmu; + intel_wakeref_t wakeref; unsigned long flags; - spin_lock_irqsave(&i915->pmu.lock, flags); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + spin_lock_irqsave(&pmu->lock, flags); /* * Update the bitmask of enabled events and increment * the event reference counter. */ - BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS); - GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); - GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); - i915->pmu.enable |= BIT_ULL(bit); - i915->pmu.enable_count[bit]++; + BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); + GEM_BUG_ON(pmu->enable_count[bit] == ~0); + + if (pmu->enable_count[bit] == 0 && + config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sleep_last = ktime_get(); + } + + pmu->enable |= BIT_ULL(bit); + pmu->enable_count[bit]++; /* * Start the sampling timer if needed and not already enabled. */ - __i915_pmu_maybe_start_timer(i915); + __i915_pmu_maybe_start_timer(pmu); /* * For per-engine events the bitmask and reference counting @@ -627,7 +690,7 @@ static void i915_pmu_enable(struct perf_event *event) engine->pmu.enable_count[sample]++; } - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&pmu->lock, flags); /* * Store the current counter value so we can report the correct delta @@ -635,6 +698,8 @@ static void i915_pmu_enable(struct perf_event *event) * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) @@ -642,9 +707,10 @@ static void i915_pmu_disable(struct perf_event *event) struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); + struct i915_pmu *pmu = &i915->pmu; unsigned long flags; - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&pmu->lock, flags); if (is_engine_event(event)) { u8 sample = engine_event_sample(event); @@ -666,18 +732,18 @@ static void i915_pmu_disable(struct perf_event *event) engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); - GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); + GEM_BUG_ON(pmu->enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. */ - if (--i915->pmu.enable_count[bit] == 0) { - i915->pmu.enable &= ~BIT_ULL(bit); - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + if (--pmu->enable_count[bit] == 0) { + pmu->enable &= ~BIT_ULL(bit); + pmu->timer_enabled &= pmu_needs_timer(pmu, true); } - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&pmu->lock, flags); } static void i915_pmu_event_start(struct perf_event *event, int flags) @@ -826,15 +892,16 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, } static struct attribute ** -create_event_attributes(struct drm_i915_private *i915) +create_event_attributes(struct i915_pmu *pmu) { + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); static const struct { u64 config; const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; @@ -851,7 +918,6 @@ create_event_attributes(struct drm_i915_private *i915) struct i915_ext_attribute *i915_attr = NULL, *i915_iter; struct attribute **attr = NULL, **attr_iter; struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int i; /* Count how many counters we will be exposing. */ @@ -860,7 +926,7 @@ create_event_attributes(struct drm_i915_private *i915) count++; } - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (i = 0; i < ARRAY_SIZE(engine_events); i++) { if (!engine_event_status(engine, engine_events[i].sample)) @@ -911,7 +977,7 @@ create_event_attributes(struct drm_i915_private *i915) } /* Initialize supported engine counters. */ - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (i = 0; i < ARRAY_SIZE(engine_events); i++) { char *str; @@ -928,7 +994,7 @@ create_event_attributes(struct drm_i915_private *i915) i915_iter = add_i915_attr(i915_iter, str, __I915_PMU_ENGINE(engine->uabi_class, - engine->instance, + engine->uabi_instance, engine_events[i].sample)); str = kasprintf(GFP_KERNEL, "%s-%s.unit", @@ -941,8 +1007,8 @@ create_event_attributes(struct drm_i915_private *i915) } } - i915->pmu.i915_attr = i915_attr; - i915->pmu.pmu_attr = pmu_attr; + pmu->i915_attr = i915_attr; + pmu->pmu_attr = pmu_attr; return attr; @@ -958,7 +1024,7 @@ err_alloc: return NULL; } -static void free_event_attributes(struct drm_i915_private *i915) +static void free_event_attributes(struct i915_pmu *pmu) { struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; @@ -966,12 +1032,12 @@ static void free_event_attributes(struct drm_i915_private *i915) kfree((*attr_iter)->name); kfree(i915_pmu_events_attr_group.attrs); - kfree(i915->pmu.i915_attr); - kfree(i915->pmu.pmu_attr); + kfree(pmu->i915_attr); + kfree(pmu->pmu_attr); i915_pmu_events_attr_group.attrs = NULL; - i915->pmu.i915_attr = NULL; - i915->pmu.pmu_attr = NULL; + pmu->i915_attr = NULL; + pmu->pmu_attr = NULL; } static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) @@ -1008,7 +1074,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; -static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) +static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { enum cpuhp_state slot; int ret; @@ -1021,7 +1087,7 @@ static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) return ret; slot = ret; - ret = cpuhp_state_add_instance(slot, &i915->pmu.node); + ret = cpuhp_state_add_instance(slot, &pmu->node); if (ret) { cpuhp_remove_multi_state(slot); return ret; @@ -1031,72 +1097,104 @@ static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) return 0; } -static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) +static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { WARN_ON(cpuhp_slot == CPUHP_INVALID); - WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); + WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node)); cpuhp_remove_multi_state(cpuhp_slot); } +static bool is_igp(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* IGP is 0000:00:02.0 */ + return pci_domain_nr(pdev->bus) == 0 && + pdev->bus->number == 0 && + PCI_SLOT(pdev->devfn) == 2 && + PCI_FUNC(pdev->devfn) == 0; +} + void i915_pmu_register(struct drm_i915_private *i915) { - int ret; + struct i915_pmu *pmu = &i915->pmu; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { - DRM_INFO("PMU not supported for this GPU."); + dev_info(i915->drm.dev, "PMU not supported for this GPU."); return; } - i915_pmu_events_attr_group.attrs = create_event_attributes(i915); - if (!i915_pmu_events_attr_group.attrs) { - ret = -ENOMEM; - goto err; + spin_lock_init(&pmu->lock); + hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + pmu->timer.function = i915_sample; + + if (!is_igp(i915)) { + pmu->name = kasprintf(GFP_KERNEL, + "i915_%s", + dev_name(i915->drm.dev)); + if (pmu->name) { + /* tools/perf reserves colons as special. */ + strreplace((char *)pmu->name, ':', '_'); + } + } else { + pmu->name = "i915"; } + if (!pmu->name) + goto err; - i915->pmu.base.attr_groups = i915_pmu_attr_groups; - i915->pmu.base.task_ctx_nr = perf_invalid_context; - i915->pmu.base.event_init = i915_pmu_event_init; - i915->pmu.base.add = i915_pmu_event_add; - i915->pmu.base.del = i915_pmu_event_del; - i915->pmu.base.start = i915_pmu_event_start; - i915->pmu.base.stop = i915_pmu_event_stop; - i915->pmu.base.read = i915_pmu_event_read; - i915->pmu.base.event_idx = i915_pmu_event_event_idx; - - spin_lock_init(&i915->pmu.lock); - hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - i915->pmu.timer.function = i915_sample; - - ret = perf_pmu_register(&i915->pmu.base, "i915", -1); + i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); + if (!i915_pmu_events_attr_group.attrs) + goto err_name; + + pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = i915_pmu_event_init; + pmu->base.add = i915_pmu_event_add; + pmu->base.del = i915_pmu_event_del; + pmu->base.start = i915_pmu_event_start; + pmu->base.stop = i915_pmu_event_stop; + pmu->base.read = i915_pmu_event_read; + pmu->base.event_idx = i915_pmu_event_event_idx; + + ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err; + goto err_attr; - ret = i915_pmu_register_cpuhp_state(i915); + ret = i915_pmu_register_cpuhp_state(pmu); if (ret) goto err_unreg; return; err_unreg: - perf_pmu_unregister(&i915->pmu.base); + perf_pmu_unregister(&pmu->base); +err_attr: + pmu->base.event_init = NULL; + free_event_attributes(pmu); +err_name: + if (!is_igp(i915)) + kfree(pmu->name); err: - i915->pmu.base.event_init = NULL; - free_event_attributes(i915); - DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); + dev_notice(i915->drm.dev, "Failed to register PMU!\n"); } void i915_pmu_unregister(struct drm_i915_private *i915) { - if (!i915->pmu.base.event_init) + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) return; - WARN_ON(i915->pmu.enable); + WARN_ON(pmu->enable); - hrtimer_cancel(&i915->pmu.timer); + hrtimer_cancel(&pmu->timer); - i915_pmu_unregister_cpuhp_state(i915); + i915_pmu_unregister_cpuhp_state(pmu); - perf_pmu_unregister(&i915->pmu.base); - i915->pmu.base.event_init = NULL; - free_event_attributes(i915); + perf_pmu_unregister(&pmu->base); + pmu->base.event_init = NULL; + if (!is_igp(i915)) + kfree(pmu->name); + free_event_attributes(pmu); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 4fc4f2478301..6c1647c5daf2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -18,7 +18,7 @@ enum { __I915_SAMPLE_FREQ_ACT = 0, __I915_SAMPLE_FREQ_REQ, __I915_SAMPLE_RC6, - __I915_SAMPLE_RC6_ESTIMATED, + __I915_SAMPLE_RC6_LAST_REPORTED, __I915_NUM_PMU_SAMPLERS }; @@ -47,6 +47,10 @@ struct i915_pmu { */ struct pmu base; /** + * @name: Name as registered with perf core. + */ + const char *name; + /** * @lock: Lock protecting enable mask and ref count handling. */ spinlock_t lock; @@ -97,9 +101,9 @@ struct i915_pmu { */ struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; /** - * @suspended_time_last: Cached suspend time from PM core. + * @sleep_last: Last time GT parked for RC6 estimation. */ - u64 suspended_time_last; + ktime_t sleep_last; /** * @i915_attr: Memory block holding device attributes. */ diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 49709de69875..732aad148881 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -17,7 +17,11 @@ enum { I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - I915_PRIORITY_INVALID = INT_MIN + /* A preemptive pulse used to monitor the health of each engine */ + I915_PRIORITY_HEARTBEAT, + + /* Interactive workload, scheduled for immediate pageflipping */ + I915_PRIORITY_DISPLAY, }; #define I915_USER_PRIORITY_SHIFT 2 @@ -29,6 +33,20 @@ enum { #define I915_PRIORITY_WAIT ((u8)BIT(0)) #define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1)) +/* Smallest priority value that cannot be bumped. */ +#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) + +/* + * Requests containing performance queries must not be preempted by + * another context. They get scheduled with their default priority and + * once they reach the execlist ports we ensure that they stick on the + * HW until finished by pretending that they have maximum priority, + * i.e. nothing can have higher priority and force us to usurp the + * active request. + */ +#define I915_PRIORITY_UNPREEMPTABLE INT_MAX +#define I915_PRIORITY_BARRIER INT_MAX + #define __NO_PREEMPTION (I915_PRIORITY_WAIT) struct i915_priolist { diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 969e514916ab..683e97ac2430 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -24,6 +24,8 @@ #ifndef _I915_PVINFO_H_ #define _I915_PVINFO_H_ +#include <linux/types.h> + /* The MMIO offset of the shared info between guest and host emulator */ #define VGT_PVINFO_PAGE 0x78000 #define VGT_PVINFO_SIZE 0x1000 @@ -110,8 +112,9 @@ struct vgt_if { u32 rsv7[0x200 - 24]; /* pad to one page */ } __packed; -#define vgtif_reg(x) \ - _MMIO((VGT_PVINFO_PAGE + offsetof(struct vgt_if, x))) +#define vgtif_offset(x) (offsetof(struct vgt_if, x)) + +#define vgtif_reg(x) _MMIO(VGT_PVINFO_PAGE + vgtif_offset(x)) /* vGPU display status to be used by the host side */ #define VGT_DRV_DISPLAY_NOT_READY 0 diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 7b7016171057..ef25ce6e395e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -7,6 +7,7 @@ #include <linux/nospec.h> #include "i915_drv.h" +#include "i915_perf.h" #include "i915_query.h" #include <uapi/drm/i915_drm.h> @@ -37,8 +38,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv, const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; - u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices); - u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; if (query_item->flags != 0) @@ -50,8 +49,8 @@ static int query_topology_info(struct drm_i915_private *dev_priv, BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); slice_length = sizeof(sseu->slice_mask); - subslice_length = sseu->max_slices * subslice_stride; - eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; + subslice_length = sseu->max_slices * sseu->ss_stride; + eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -69,9 +68,9 @@ static int query_topology_info(struct drm_i915_private *dev_priv, topo.max_eus_per_subslice = sseu->max_eus_per_subslice; topo.subslice_offset = slice_length; - topo.subslice_stride = subslice_stride; + topo.subslice_stride = sseu->ss_stride; topo.eu_offset = slice_length + subslice_length; - topo.eu_stride = eu_stride; + topo.eu_stride = sseu->eu_stride; if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), &topo, sizeof(topo))) @@ -104,16 +103,18 @@ query_engine_info(struct drm_i915_private *i915, struct drm_i915_engine_info __user *info_ptr; struct drm_i915_query_engine_info query; struct drm_i915_engine_info info = { }; + unsigned int num_uabi_engines = 0; struct intel_engine_cs *engine; - enum intel_engine_id id; int len, ret; if (query_item->flags) return -EINVAL; + for_each_uabi_engine(engine, i915) + num_uabi_engines++; + len = sizeof(struct drm_i915_query_engine_info) + - RUNTIME_INFO(i915)->num_engines * - sizeof(struct drm_i915_engine_info); + num_uabi_engines * sizeof(struct drm_i915_engine_info); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) @@ -125,9 +126,9 @@ query_engine_info(struct drm_i915_private *i915, info_ptr = &query_ptr->engines[0]; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { info.engine.engine_class = engine->uabi_class; - info.engine.engine_instance = engine->instance; + info.engine.engine_instance = engine->uabi_instance; info.capabilities = engine->uabi_capabilities; if (__copy_to_user(info_ptr, &info, sizeof(info))) @@ -143,10 +144,305 @@ query_engine_info(struct drm_i915_private *i915, return len; } +static int can_copy_perf_config_registers_or_number(u32 user_n_regs, + u64 user_regs_ptr, + u32 kernel_n_regs) +{ + /* + * We'll just put the number of registers, and won't copy the + * register. + */ + if (user_n_regs == 0) + return 0; + + if (user_n_regs < kernel_n_regs) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(user_regs_ptr), + 2 * sizeof(u32) * kernel_n_regs)) + return -EFAULT; + + return 0; +} + +static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs, + u32 kernel_n_regs, + u64 user_regs_ptr, + u32 *user_n_regs) +{ + u32 r; + + if (*user_n_regs == 0) { + *user_n_regs = kernel_n_regs; + return 0; + } + + *user_n_regs = kernel_n_regs; + + for (r = 0; r < kernel_n_regs; r++) { + u32 __user *user_reg_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2); + u32 __user *user_val_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 + + sizeof(u32)); + int ret; + + ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr), + user_reg_ptr); + if (ret) + return -EFAULT; + + ret = __put_user(kernel_regs[r].value, user_val_ptr); + if (ret) + return -EFAULT; + } + + return 0; +} + +static int query_perf_config_data(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item, + bool use_uuid) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_perf_oa_config __user *user_config_ptr = + u64_to_user_ptr(query_item->data_ptr + + sizeof(struct drm_i915_query_perf_config)); + struct drm_i915_perf_oa_config user_config; + struct i915_perf *perf = &i915->perf; + struct i915_oa_config *oa_config; + char uuid[UUID_STRING_LEN + 1]; + u64 config_id; + u32 flags, total_size; + int ret; + + if (!perf->i915) + return -ENODEV; + + total_size = + sizeof(struct drm_i915_query_perf_config) + + sizeof(struct drm_i915_perf_oa_config); + + if (query_item->length == 0) + return total_size; + + if (query_item->length < total_size) { + DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); + return -EINVAL; + } + + if (!access_ok(user_query_config_ptr, total_size)) + return -EFAULT; + + if (__get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (use_uuid) { + struct i915_oa_config *tmp; + int id; + + BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid)); + + memset(&uuid, 0, sizeof(uuid)); + if (__copy_from_user(uuid, user_query_config_ptr->uuid, + sizeof(user_query_config_ptr->uuid))) + return -EFAULT; + + oa_config = NULL; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, uuid)) { + oa_config = i915_oa_config_get(tmp); + break; + } + } + rcu_read_unlock(); + } else { + if (__get_user(config_id, &user_query_config_ptr->config)) + return -EFAULT; + + oa_config = i915_perf_get_oa_config(perf, config_id); + } + if (!oa_config) + return -ENOENT; + + if (__copy_from_user(&user_config, user_config_ptr, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs, + user_config.boolean_regs_ptr, + oa_config->b_counter_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs, + user_config.flex_regs_ptr, + oa_config->flex_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs, + user_config.mux_regs_ptr, + oa_config->mux_regs_len); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs, + oa_config->b_counter_regs_len, + user_config.boolean_regs_ptr, + &user_config.n_boolean_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->flex_regs, + oa_config->flex_regs_len, + user_config.flex_regs_ptr, + &user_config.n_flex_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->mux_regs, + oa_config->mux_regs_len, + user_config.mux_regs_ptr, + &user_config.n_mux_regs); + if (ret) + goto out; + + memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid)); + + if (__copy_to_user(user_config_ptr, &user_config, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = total_size; + +out: + i915_oa_config_put(oa_config); + return ret; +} + +static size_t sizeof_perf_config_list(size_t count) +{ + return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count; +} + +static size_t sizeof_perf_metrics(struct i915_perf *perf) +{ + struct i915_oa_config *tmp; + size_t i; + int id; + + i = 1; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) + i++; + rcu_read_unlock(); + + return sizeof_perf_config_list(i); +} + +static int query_perf_config_list(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct i915_perf *perf = &i915->perf; + u64 *oa_config_ids = NULL; + int alloc, n_configs; + u32 flags; + int ret; + + if (!perf->i915) + return -ENODEV; + + if (query_item->length == 0) + return sizeof_perf_metrics(perf); + + if (get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + n_configs = 1; + do { + struct i915_oa_config *tmp; + u64 *ids; + int id; + + ids = krealloc(oa_config_ids, + n_configs * sizeof(*oa_config_ids), + GFP_KERNEL); + if (!ids) + return -ENOMEM; + + alloc = fetch_and_zero(&n_configs); + + ids[n_configs++] = 1ull; /* reserved for test_config */ + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (n_configs < alloc) + ids[n_configs] = id; + n_configs++; + } + rcu_read_unlock(); + + oa_config_ids = ids; + } while (n_configs > alloc); + + if (query_item->length < sizeof_perf_config_list(n_configs)) { + DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); + kfree(oa_config_ids); + return -EINVAL; + } + + if (put_user(n_configs, &user_query_config_ptr->config)) { + kfree(oa_config_ids); + return -EFAULT; + } + + ret = copy_to_user(user_query_config_ptr + 1, + oa_config_ids, + n_configs * sizeof(*oa_config_ids)); + kfree(oa_config_ids); + if (ret) + return -EFAULT; + + return sizeof_perf_config_list(n_configs); +} + +static int query_perf_config(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + switch (query_item->flags) { + case DRM_I915_QUERY_PERF_CONFIG_LIST: + return query_perf_config_list(i915, query_item); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID: + return query_perf_config_data(i915, query_item, true); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID: + return query_perf_config_data(i915, query_item, false); + default: + return -EINVAL; + } +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, + query_perf_config, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d6483b5dc8e5..6cc55c103f67 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -242,6 +242,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) +#define _MMIO_PLL3(pll, a, b, c) _MMIO(_PICK(pll, a, b, c)) /* * Device info offset array based helpers for groups of registers with unevenly @@ -250,9 +251,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PIPE2(pipe, reg) _MMIO(INTEL_INFO(dev_priv)->pipe_offsets[pipe] - \ INTEL_INFO(dev_priv)->pipe_offsets[PIPE_A] + (reg) + \ DISPLAY_MMIO_BASE(dev_priv)) -#define _MMIO_TRANS2(pipe, reg) _MMIO(INTEL_INFO(dev_priv)->trans_offsets[(pipe)] - \ - INTEL_INFO(dev_priv)->trans_offsets[TRANSCODER_A] + (reg) + \ - DISPLAY_MMIO_BASE(dev_priv)) +#define _TRANS2(tran, reg) (INTEL_INFO(dev_priv)->trans_offsets[(tran)] - \ + INTEL_INFO(dev_priv)->trans_offsets[TRANSCODER_A] + (reg) + \ + DISPLAY_MMIO_BASE(dev_priv)) +#define _MMIO_TRANS2(tran, reg) _MMIO(_TRANS2(tran, reg)) #define _CURSOR2(pipe, reg) _MMIO(INTEL_INFO(dev_priv)->cursor_offsets[(pipe)] - \ INTEL_INFO(dev_priv)->cursor_offsets[PIPE_A] + (reg) + \ DISPLAY_MMIO_BASE(dev_priv)) @@ -270,30 +272,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) #define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) -/* Engine ID */ - -#define RCS0_HW 0 -#define VCS0_HW 1 -#define BCS0_HW 2 -#define VECS0_HW 3 -#define VCS1_HW 4 -#define VCS2_HW 6 -#define VCS3_HW 7 -#define VECS1_HW 12 - -/* Engine class */ - -#define RENDER_CLASS 0 -#define VIDEO_DECODE_CLASS 1 -#define VIDEO_ENHANCEMENT_CLASS 2 -#define COPY_ENGINE_CLASS 3 -#define OTHER_CLASS 4 -#define MAX_ENGINE_CLASS 4 - -#define OTHER_GUC_INSTANCE 0 -#define OTHER_GTPM_INSTANCE 1 -#define MAX_ENGINE_INSTANCE 3 - /* PCI config space */ #define MCHBAR_I915 0x44 @@ -435,6 +413,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_VECS_SFC_USAGE(engine) _MMIO((engine)->mmio_base + 0x2014) #define GEN11_VECS_SFC_USAGE_BIT (1 << 0) +#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) +#define GEN12_SFC_DONE_MAX 4 + #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) #define RING_PP_DIR_BASE_READ(base) _MMIO((base) + 0x518) #define RING_PP_DIR_DCLV(base) _MMIO((base) + 0x220) @@ -493,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1 << 13) #define ECOBITS_PPGTT_CACHE64B (3 << 8) @@ -567,7 +550,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) #define MI_PREDICATE_SRC1_UDW _MMIO(0x2408 + 4) - +#define MI_PREDICATE_DATA _MMIO(0x2410) +#define MI_PREDICATE_RESULT _MMIO(0x2418) +#define MI_PREDICATE_RESULT_1 _MMIO(0x241c) #define MI_PREDICATE_RESULT_2 _MMIO(0x2214) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -577,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) @@ -704,6 +693,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_16M (7 << 3) +/* Gen12 OAR unit */ +#define GEN12_OAR_OACONTROL _MMIO(0x2960) +#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 +#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) + +#define GEN12_OACTXCONTROL _MMIO(0x2360) +#define GEN12_OAR_OASTATUS _MMIO(0x2968) + +/* Gen12 OAG unit */ +#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00) +#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0 +#define GEN12_OAG_OATAILPTR _MMIO(0xdb04) +#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0 + +#define GEN12_OAG_OABUFFER _MMIO(0xdb08) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3) +#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */ + +#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28) +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2 +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1) +#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0) + +#define GEN12_OAG_OACONTROL _MMIO(0xdaf4) +#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2 +#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0) + +#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8) +#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6) +#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5) +#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2) +#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1) + +#define GEN12_OAG_OASTATUS _MMIO(0xdafc) +#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0) + /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -940,6 +968,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 +/* Same layout as OASTARTTRIGX */ +#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900) +#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904) +#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908) +#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c) +#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910) +#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914) +#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918) +#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c) + +/* Same layout as OAREPORTTRIGX */ +#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920) +#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924) +#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928) +#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c) +#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930) +#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934) +#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938) +#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c) + /* CECX_0 */ #define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_NOT_EQUAL 5 @@ -956,6 +1004,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19) +/* 11-bit array 0: pass-through, 1: negated */ +#define GEN12_OASCEC_NEGATE_MASK 0x7ff +#define GEN12_OASCEC_NEGATE_SHIFT 21 + /* CECX_1 */ #define OACEC_MASK_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff @@ -978,6 +1030,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) +/* Same layout as CECX_Y */ +#define GEN12_OAG_CEC0_0 _MMIO(0xd940) +#define GEN12_OAG_CEC0_1 _MMIO(0xd944) +#define GEN12_OAG_CEC1_0 _MMIO(0xd948) +#define GEN12_OAG_CEC1_1 _MMIO(0xd94c) +#define GEN12_OAG_CEC2_0 _MMIO(0xd950) +#define GEN12_OAG_CEC2_1 _MMIO(0xd954) +#define GEN12_OAG_CEC3_0 _MMIO(0xd958) +#define GEN12_OAG_CEC3_1 _MMIO(0xd95c) +#define GEN12_OAG_CEC4_0 _MMIO(0xd960) +#define GEN12_OAG_CEC4_1 _MMIO(0xd964) +#define GEN12_OAG_CEC5_0 _MMIO(0xd968) +#define GEN12_OAG_CEC5_1 _MMIO(0xd96c) +#define GEN12_OAG_CEC6_0 _MMIO(0xd970) +#define GEN12_OAG_CEC6_1 _MMIO(0xd974) +#define GEN12_OAG_CEC7_0 _MMIO(0xd978) +#define GEN12_OAG_CEC7_1 _MMIO(0xd97c) + +/* Same layout as CECX_Y + negate 11-bit array */ +#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00) +#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04) +#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08) +#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c) +#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10) +#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14) +#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18) +#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c) +#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20) +#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24) +#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28) +#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c) +#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30) +#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34) +#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38) +#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c) + /* OA perf counters */ #define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_HI _MMIO(0x91BC) @@ -1058,6 +1146,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C) +#define GEN12_OAA_DBG_REG _MMIO(0xdc44) +#define GEN12_OAG_OA_PESS _MMIO(0x2b2c) +#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40) + #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 @@ -1161,27 +1253,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define PUNIT_REG_ISPSSPM0 0x39 #define PUNIT_REG_ISPSSPM1 0x3a -/* - * i915_power_well_id: - * - * IDs used to look up power wells. Power wells accessed directly bypassing - * the power domains framework must be assigned a unique ID. The rest of power - * wells must be assigned DISP_PW_ID_NONE. - */ -enum i915_power_well_id { - DISP_PW_ID_NONE, - - VLV_DISP_PW_DISP2D, - BXT_DISP_PW_DPIO_CMN_A, - VLV_DISP_PW_DPIO_CMN_BC, - GLK_DISP_PW_DPIO_CMN_C, - CHV_DISP_PW_DPIO_CMN_D, - HSW_DISP_PW_GLOBAL, - SKL_DISP_PW_MISC_IO, - SKL_DISP_PW_1, - SKL_DISP_PW_2, -}; - #define PUNIT_REG_PWRGT_CTRL 0x60 #define PUNIT_REG_PWRGT_STATUS 0x61 #define PUNIT_PWRGT_MASK(pw_idx) (3 << ((pw_idx) * 2)) @@ -1793,19 +1864,21 @@ enum i915_power_well_id { */ #define _ICL_COMBOPHY_A 0x162000 #define _ICL_COMBOPHY_B 0x6C000 -#define _ICL_COMBOPHY(port) _PICK(port, _ICL_COMBOPHY_A, \ - _ICL_COMBOPHY_B) +#define _EHL_COMBOPHY_C 0x160000 +#define _ICL_COMBOPHY(phy) _PICK(phy, _ICL_COMBOPHY_A, \ + _ICL_COMBOPHY_B, \ + _EHL_COMBOPHY_C) /* CNL/ICL Port CL_DW registers */ -#define _ICL_PORT_CL_DW(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_CL_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ 4 * (dw)) #define CNL_PORT_CL1CM_DW5 _MMIO(0x162014) -#define ICL_PORT_CL_DW5(port) _MMIO(_ICL_PORT_CL_DW(5, port)) +#define ICL_PORT_CL_DW5(phy) _MMIO(_ICL_PORT_CL_DW(5, phy)) #define CL_POWER_DOWN_ENABLE (1 << 4) #define SUS_CLOCK_CONFIG (3 << 0) -#define ICL_PORT_CL_DW10(port) _MMIO(_ICL_PORT_CL_DW(10, port)) +#define ICL_PORT_CL_DW10(phy) _MMIO(_ICL_PORT_CL_DW(10, phy)) #define PG_SEQ_DELAY_OVERRIDE_MASK (3 << 25) #define PG_SEQ_DELAY_OVERRIDE_SHIFT 25 #define PG_SEQ_DELAY_OVERRIDE_ENABLE (1 << 24) @@ -1820,23 +1893,23 @@ enum i915_power_well_id { #define PWR_DOWN_LN_MASK (0xf << 4) #define PWR_DOWN_LN_SHIFT 4 -#define ICL_PORT_CL_DW12(port) _MMIO(_ICL_PORT_CL_DW(12, port)) +#define ICL_PORT_CL_DW12(phy) _MMIO(_ICL_PORT_CL_DW(12, phy)) #define ICL_LANE_ENABLE_AUX (1 << 0) /* CNL/ICL Port COMP_DW registers */ #define _ICL_PORT_COMP 0x100 -#define _ICL_PORT_COMP_DW(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_COMP_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_COMP + 4 * (dw)) #define CNL_PORT_COMP_DW0 _MMIO(0x162100) -#define ICL_PORT_COMP_DW0(port) _MMIO(_ICL_PORT_COMP_DW(0, port)) +#define ICL_PORT_COMP_DW0(phy) _MMIO(_ICL_PORT_COMP_DW(0, phy)) #define COMP_INIT (1 << 31) #define CNL_PORT_COMP_DW1 _MMIO(0x162104) -#define ICL_PORT_COMP_DW1(port) _MMIO(_ICL_PORT_COMP_DW(1, port)) +#define ICL_PORT_COMP_DW1(phy) _MMIO(_ICL_PORT_COMP_DW(1, phy)) #define CNL_PORT_COMP_DW3 _MMIO(0x16210c) -#define ICL_PORT_COMP_DW3(port) _MMIO(_ICL_PORT_COMP_DW(3, port)) +#define ICL_PORT_COMP_DW3(phy) _MMIO(_ICL_PORT_COMP_DW(3, phy)) #define PROCESS_INFO_DOT_0 (0 << 26) #define PROCESS_INFO_DOT_1 (1 << 26) #define PROCESS_INFO_DOT_4 (2 << 26) @@ -1848,14 +1921,14 @@ enum i915_power_well_id { #define VOLTAGE_INFO_MASK (3 << 24) #define VOLTAGE_INFO_SHIFT 24 -#define ICL_PORT_COMP_DW8(port) _MMIO(_ICL_PORT_COMP_DW(8, port)) +#define ICL_PORT_COMP_DW8(phy) _MMIO(_ICL_PORT_COMP_DW(8, phy)) #define IREFGEN (1 << 24) #define CNL_PORT_COMP_DW9 _MMIO(0x162124) -#define ICL_PORT_COMP_DW9(port) _MMIO(_ICL_PORT_COMP_DW(9, port)) +#define ICL_PORT_COMP_DW9(phy) _MMIO(_ICL_PORT_COMP_DW(9, phy)) #define CNL_PORT_COMP_DW10 _MMIO(0x162128) -#define ICL_PORT_COMP_DW10(port) _MMIO(_ICL_PORT_COMP_DW(10, port)) +#define ICL_PORT_COMP_DW10(phy) _MMIO(_ICL_PORT_COMP_DW(10, phy)) /* CNL/ICL Port PCS registers */ #define _CNL_PORT_PCS_DW1_GRP_AE 0x162304 @@ -1868,14 +1941,14 @@ enum i915_power_well_id { #define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 #define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 #define _CNL_PORT_PCS_DW1_LN0_F 0x162804 -#define CNL_PORT_PCS_DW1_GRP(port) _MMIO(_PICK(port, \ +#define CNL_PORT_PCS_DW1_GRP(phy) _MMIO(_PICK(phy, \ _CNL_PORT_PCS_DW1_GRP_AE, \ _CNL_PORT_PCS_DW1_GRP_B, \ _CNL_PORT_PCS_DW1_GRP_C, \ _CNL_PORT_PCS_DW1_GRP_D, \ _CNL_PORT_PCS_DW1_GRP_AE, \ _CNL_PORT_PCS_DW1_GRP_F)) -#define CNL_PORT_PCS_DW1_LN0(port) _MMIO(_PICK(port, \ +#define CNL_PORT_PCS_DW1_LN0(phy) _MMIO(_PICK(phy, \ _CNL_PORT_PCS_DW1_LN0_AE, \ _CNL_PORT_PCS_DW1_LN0_B, \ _CNL_PORT_PCS_DW1_LN0_C, \ @@ -1886,16 +1959,18 @@ enum i915_power_well_id { #define _ICL_PORT_PCS_AUX 0x300 #define _ICL_PORT_PCS_GRP 0x600 #define _ICL_PORT_PCS_LN(ln) (0x800 + (ln) * 0x100) -#define _ICL_PORT_PCS_DW_AUX(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_PCS_DW_AUX(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_PCS_AUX + 4 * (dw)) -#define _ICL_PORT_PCS_DW_GRP(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_PCS_DW_GRP(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_PCS_GRP + 4 * (dw)) -#define _ICL_PORT_PCS_DW_LN(dw, ln, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_PCS_DW_LN(dw, ln, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_PCS_LN(ln) + 4 * (dw)) -#define ICL_PORT_PCS_DW1_AUX(port) _MMIO(_ICL_PORT_PCS_DW_AUX(1, port)) -#define ICL_PORT_PCS_DW1_GRP(port) _MMIO(_ICL_PORT_PCS_DW_GRP(1, port)) -#define ICL_PORT_PCS_DW1_LN0(port) _MMIO(_ICL_PORT_PCS_DW_LN(1, 0, port)) +#define ICL_PORT_PCS_DW1_AUX(phy) _MMIO(_ICL_PORT_PCS_DW_AUX(1, phy)) +#define ICL_PORT_PCS_DW1_GRP(phy) _MMIO(_ICL_PORT_PCS_DW_GRP(1, phy)) +#define ICL_PORT_PCS_DW1_LN0(phy) _MMIO(_ICL_PORT_PCS_DW_LN(1, 0, phy)) #define COMMON_KEEPER_EN (1 << 26) +#define LATENCY_OPTIM_MASK (0x3 << 2) +#define LATENCY_OPTIM_VAL(x) ((x) << 2) /* CNL/ICL Port TX registers */ #define _CNL_PORT_TX_AE_GRP_OFFSET 0x162340 @@ -1929,18 +2004,18 @@ enum i915_power_well_id { #define _ICL_PORT_TX_GRP 0x680 #define _ICL_PORT_TX_LN(ln) (0x880 + (ln) * 0x100) -#define _ICL_PORT_TX_DW_AUX(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_TX_DW_AUX(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_TX_AUX + 4 * (dw)) -#define _ICL_PORT_TX_DW_GRP(dw, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_TX_DW_GRP(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_TX_GRP + 4 * (dw)) -#define _ICL_PORT_TX_DW_LN(dw, ln, port) (_ICL_COMBOPHY(port) + \ +#define _ICL_PORT_TX_DW_LN(dw, ln, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_TX_LN(ln) + 4 * (dw)) #define CNL_PORT_TX_DW2_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(2, port)) #define CNL_PORT_TX_DW2_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(2, port)) -#define ICL_PORT_TX_DW2_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(2, port)) -#define ICL_PORT_TX_DW2_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(2, port)) -#define ICL_PORT_TX_DW2_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(2, 0, port)) +#define ICL_PORT_TX_DW2_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(2, phy)) +#define ICL_PORT_TX_DW2_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(2, phy)) +#define ICL_PORT_TX_DW2_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(2, 0, phy)) #define SWING_SEL_UPPER(x) (((x) >> 3) << 15) #define SWING_SEL_UPPER_MASK (1 << 15) #define SWING_SEL_LOWER(x) (((x) & 0x7) << 11) @@ -1957,10 +2032,10 @@ enum i915_power_well_id { #define CNL_PORT_TX_DW4_LN(ln, port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ _CNL_PORT_TX_DW4_LN0_AE))) -#define ICL_PORT_TX_DW4_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(4, port)) -#define ICL_PORT_TX_DW4_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(4, port)) -#define ICL_PORT_TX_DW4_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(4, 0, port)) -#define ICL_PORT_TX_DW4_LN(ln, port) _MMIO(_ICL_PORT_TX_DW_LN(4, ln, port)) +#define ICL_PORT_TX_DW4_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(4, phy)) +#define ICL_PORT_TX_DW4_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(4, phy)) +#define ICL_PORT_TX_DW4_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(4, 0, phy)) +#define ICL_PORT_TX_DW4_LN(ln, phy) _MMIO(_ICL_PORT_TX_DW_LN(4, ln, phy)) #define LOADGEN_SELECT (1 << 31) #define POST_CURSOR_1(x) ((x) << 12) #define POST_CURSOR_1_MASK (0x3F << 12) @@ -1971,9 +2046,9 @@ enum i915_power_well_id { #define CNL_PORT_TX_DW5_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(5, port)) #define CNL_PORT_TX_DW5_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(5, port)) -#define ICL_PORT_TX_DW5_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(5, port)) -#define ICL_PORT_TX_DW5_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(5, port)) -#define ICL_PORT_TX_DW5_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(5, 0, port)) +#define ICL_PORT_TX_DW5_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(5, phy)) +#define ICL_PORT_TX_DW5_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(5, phy)) +#define ICL_PORT_TX_DW5_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(5, 0, phy)) #define TX_TRAINING_EN (1 << 31) #define TAP2_DISABLE (1 << 30) #define TAP3_DISABLE (1 << 29) @@ -1984,15 +2059,19 @@ enum i915_power_well_id { #define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) #define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) -#define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) -#define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) -#define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) -#define ICL_PORT_TX_DW7_LN(ln, port) _MMIO(_ICL_PORT_TX_DW_LN(7, ln, port)) +#define ICL_PORT_TX_DW7_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(7, phy)) +#define ICL_PORT_TX_DW7_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(7, phy)) +#define ICL_PORT_TX_DW7_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, phy)) +#define ICL_PORT_TX_DW7_LN(ln, phy) _MMIO(_ICL_PORT_TX_DW_LN(7, ln, phy)) #define N_SCALAR(x) ((x) << 24) #define N_SCALAR_MASK (0x7F << 24) -#define MG_PHY_PORT_LN(ln, port, ln0p1, ln0p2, ln1p1) \ - _MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) +#define _ICL_DPHY_CHKN_REG 0x194 +#define ICL_DPHY_CHKN(port) _MMIO(_ICL_COMBOPHY(port) + _ICL_DPHY_CHKN_REG) +#define ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP REG_BIT(7) + +#define MG_PHY_PORT_LN(ln, tc_port, ln0p1, ln0p2, ln1p1) \ + _MMIO(_PORT(tc_port, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) #define MG_TX_LINK_PARAMS_TX1LN0_PORT1 0x16812C #define MG_TX_LINK_PARAMS_TX1LN1_PORT1 0x16852C @@ -2002,10 +2081,10 @@ enum i915_power_well_id { #define MG_TX_LINK_PARAMS_TX1LN1_PORT3 0x16A52C #define MG_TX_LINK_PARAMS_TX1LN0_PORT4 0x16B12C #define MG_TX_LINK_PARAMS_TX1LN1_PORT4 0x16B52C -#define MG_TX1_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX1LN1_PORT1) +#define MG_TX1_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX1LN1_PORT1) #define MG_TX_LINK_PARAMS_TX2LN0_PORT1 0x1680AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT1 0x1684AC @@ -2015,10 +2094,10 @@ enum i915_power_well_id { #define MG_TX_LINK_PARAMS_TX2LN1_PORT3 0x16A4AC #define MG_TX_LINK_PARAMS_TX2LN0_PORT4 0x16B0AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT4 0x16B4AC -#define MG_TX2_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX2LN1_PORT1) +#define MG_TX2_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX2LN1_PORT1) #define CRI_USE_FS32 (1 << 5) #define MG_TX_PISO_READLOAD_TX1LN0_PORT1 0x16814C @@ -2029,10 +2108,10 @@ enum i915_power_well_id { #define MG_TX_PISO_READLOAD_TX1LN1_PORT3 0x16A54C #define MG_TX_PISO_READLOAD_TX1LN0_PORT4 0x16B14C #define MG_TX_PISO_READLOAD_TX1LN1_PORT4 0x16B54C -#define MG_TX1_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX1LN1_PORT1) +#define MG_TX1_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX1LN1_PORT1) #define MG_TX_PISO_READLOAD_TX2LN0_PORT1 0x1680CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT1 0x1684CC @@ -2042,10 +2121,10 @@ enum i915_power_well_id { #define MG_TX_PISO_READLOAD_TX2LN1_PORT3 0x16A4CC #define MG_TX_PISO_READLOAD_TX2LN0_PORT4 0x16B0CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT4 0x16B4CC -#define MG_TX2_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX2LN1_PORT1) +#define MG_TX2_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX2LN1_PORT1) #define CRI_CALCINIT (1 << 1) #define MG_TX_SWINGCTRL_TX1LN0_PORT1 0x168148 @@ -2056,10 +2135,10 @@ enum i915_power_well_id { #define MG_TX_SWINGCTRL_TX1LN1_PORT3 0x16A548 #define MG_TX_SWINGCTRL_TX1LN0_PORT4 0x16B148 #define MG_TX_SWINGCTRL_TX1LN1_PORT4 0x16B548 -#define MG_TX1_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ - MG_TX_SWINGCTRL_TX1LN0_PORT2, \ - MG_TX_SWINGCTRL_TX1LN1_PORT1) +#define MG_TX1_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ + MG_TX_SWINGCTRL_TX1LN0_PORT2, \ + MG_TX_SWINGCTRL_TX1LN1_PORT1) #define MG_TX_SWINGCTRL_TX2LN0_PORT1 0x1680C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT1 0x1684C8 @@ -2069,10 +2148,10 @@ enum i915_power_well_id { #define MG_TX_SWINGCTRL_TX2LN1_PORT3 0x16A4C8 #define MG_TX_SWINGCTRL_TX2LN0_PORT4 0x16B0C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT4 0x16B4C8 -#define MG_TX2_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ - MG_TX_SWINGCTRL_TX2LN0_PORT2, \ - MG_TX_SWINGCTRL_TX2LN1_PORT1) +#define MG_TX2_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ + MG_TX_SWINGCTRL_TX2LN0_PORT2, \ + MG_TX_SWINGCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_17_12(x) ((x) << 0) #define CRI_TXDEEMPH_OVERRIDE_17_12_MASK (0x3F << 0) @@ -2084,10 +2163,10 @@ enum i915_power_well_id { #define MG_TX_DRVCTRL_TX1LN1_TXPORT3 0x16A544 #define MG_TX_DRVCTRL_TX1LN0_TXPORT4 0x16B144 #define MG_TX_DRVCTRL_TX1LN1_TXPORT4 0x16B544 -#define MG_TX1_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ - MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ - MG_TX_DRVCTRL_TX1LN1_TXPORT1) +#define MG_TX1_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ + MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ + MG_TX_DRVCTRL_TX1LN1_TXPORT1) #define MG_TX_DRVCTRL_TX2LN0_PORT1 0x1680C4 #define MG_TX_DRVCTRL_TX2LN1_PORT1 0x1684C4 @@ -2097,10 +2176,10 @@ enum i915_power_well_id { #define MG_TX_DRVCTRL_TX2LN1_PORT3 0x16A4C4 #define MG_TX_DRVCTRL_TX2LN0_PORT4 0x16B0C4 #define MG_TX_DRVCTRL_TX2LN1_PORT4 0x16B4C4 -#define MG_TX2_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ - MG_TX_DRVCTRL_TX2LN0_PORT2, \ - MG_TX_DRVCTRL_TX2LN1_PORT1) +#define MG_TX2_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ + MG_TX_DRVCTRL_TX2LN0_PORT2, \ + MG_TX_DRVCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_11_6(x) ((x) << 24) #define CRI_TXDEEMPH_OVERRIDE_11_6_MASK (0x3F << 24) #define CRI_TXDEEMPH_OVERRIDE_EN (1 << 22) @@ -2117,10 +2196,10 @@ enum i915_power_well_id { #define MG_CLKHUB_LN1_PORT3 0x16A79C #define MG_CLKHUB_LN0_PORT4 0x16B39C #define MG_CLKHUB_LN1_PORT4 0x16B79C -#define MG_CLKHUB(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_CLKHUB_LN0_PORT1, \ - MG_CLKHUB_LN0_PORT2, \ - MG_CLKHUB_LN1_PORT1) +#define MG_CLKHUB(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_CLKHUB_LN0_PORT1, \ + MG_CLKHUB_LN0_PORT2, \ + MG_CLKHUB_LN1_PORT1) #define CFG_LOW_RATE_LKREN_EN (1 << 11) #define MG_TX_DCC_TX1LN0_PORT1 0x168110 @@ -2131,10 +2210,10 @@ enum i915_power_well_id { #define MG_TX_DCC_TX1LN1_PORT3 0x16A510 #define MG_TX_DCC_TX1LN0_PORT4 0x16B110 #define MG_TX_DCC_TX1LN1_PORT4 0x16B510 -#define MG_TX1_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX1LN0_PORT1, \ - MG_TX_DCC_TX1LN0_PORT2, \ - MG_TX_DCC_TX1LN1_PORT1) +#define MG_TX1_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX1LN0_PORT1, \ + MG_TX_DCC_TX1LN0_PORT2, \ + MG_TX_DCC_TX1LN1_PORT1) #define MG_TX_DCC_TX2LN0_PORT1 0x168090 #define MG_TX_DCC_TX2LN1_PORT1 0x168490 #define MG_TX_DCC_TX2LN0_PORT2 0x169090 @@ -2143,10 +2222,10 @@ enum i915_power_well_id { #define MG_TX_DCC_TX2LN1_PORT3 0x16A490 #define MG_TX_DCC_TX2LN0_PORT4 0x16B090 #define MG_TX_DCC_TX2LN1_PORT4 0x16B490 -#define MG_TX2_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX2LN0_PORT1, \ - MG_TX_DCC_TX2LN0_PORT2, \ - MG_TX_DCC_TX2LN1_PORT1) +#define MG_TX2_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX2LN0_PORT1, \ + MG_TX_DCC_TX2LN0_PORT2, \ + MG_TX_DCC_TX2LN1_PORT1) #define CFG_AMI_CK_DIV_OVERRIDE_VAL(x) ((x) << 25) #define CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK (0x3 << 25) #define CFG_AMI_CK_DIV_OVERRIDE_EN (1 << 24) @@ -2159,32 +2238,12 @@ enum i915_power_well_id { #define MG_DP_MODE_LN1_ACU_PORT3 0x16A7A0 #define MG_DP_MODE_LN0_ACU_PORT4 0x16B3A0 #define MG_DP_MODE_LN1_ACU_PORT4 0x16B7A0 -#define MG_DP_MODE(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_DP_MODE_LN0_ACU_PORT1, \ - MG_DP_MODE_LN0_ACU_PORT2, \ - MG_DP_MODE_LN1_ACU_PORT1) +#define MG_DP_MODE(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_DP_MODE_LN0_ACU_PORT1, \ + MG_DP_MODE_LN0_ACU_PORT2, \ + MG_DP_MODE_LN1_ACU_PORT1) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) -#define MG_DP_MODE_CFG_TR2PWR_GATING (1 << 5) -#define MG_DP_MODE_CFG_TRPWR_GATING (1 << 4) -#define MG_DP_MODE_CFG_CLNPWR_GATING (1 << 3) -#define MG_DP_MODE_CFG_DIGPWR_GATING (1 << 2) -#define MG_DP_MODE_CFG_GAONPWR_GATING (1 << 1) - -#define MG_MISC_SUS0_PORT1 0x168814 -#define MG_MISC_SUS0_PORT2 0x169814 -#define MG_MISC_SUS0_PORT3 0x16A814 -#define MG_MISC_SUS0_PORT4 0x16B814 -#define MG_MISC_SUS0(tc_port) \ - _MMIO(_PORT(tc_port, MG_MISC_SUS0_PORT1, MG_MISC_SUS0_PORT2)) -#define MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK (3 << 14) -#define MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(x) ((x) << 14) -#define MG_MISC_SUS0_CFG_TR2PWR_GATING (1 << 12) -#define MG_MISC_SUS0_CFG_CL2PWR_GATING (1 << 11) -#define MG_MISC_SUS0_CFG_GAONPWR_GATING (1 << 10) -#define MG_MISC_SUS0_CFG_TRPWR_GATING (1 << 7) -#define MG_MISC_SUS0_CFG_CL1PWR_GATING (1 << 6) -#define MG_MISC_SUS0_CFG_DGPWR_GATING (1 << 5) /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. @@ -2195,15 +2254,19 @@ enum i915_power_well_id { #define DW6_OLDO_DYN_PWR_DOWN_EN (1 << 28) #define FIA1_BASE 0x163000 +#define FIA2_BASE 0x16E000 +#define FIA3_BASE 0x16F000 +#define _FIA(fia) _PICK((fia), FIA1_BASE, FIA2_BASE, FIA3_BASE) +#define _MMIO_FIA(fia, off) _MMIO(_FIA(fia) + (off)) /* ICL PHY DFLEX registers */ -#define PORT_TX_DFLEXDPMLE1 _MMIO(FIA1_BASE + 0x008C0) -#define DFLEXDPMLE1_DPMLETC_MASK(tc_port) (0xf << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML0(tc_port) (1 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML1_0(tc_port) (3 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3(tc_port) (8 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) (12 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_0(tc_port) (15 << (4 * (tc_port))) +#define PORT_TX_DFLEXDPMLE1(fia) _MMIO_FIA((fia), 0x008C0) +#define DFLEXDPMLE1_DPMLETC_MASK(idx) (0xf << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML0(idx) (1 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML1_0(idx) (3 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3(idx) (8 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_2(idx) (12 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_0(idx) (15 << (4 * (idx))) /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C @@ -2477,15 +2540,18 @@ enum i915_power_well_id { #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) #define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->hw_id) #define GEN8_RING_FAULT_REG _MMIO(0x4094) +#define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1 << 11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) #define RING_FAULT_VALID (1 << 0) #define DONE_REG _MMIO(0x40b0) +#define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN8_PRIVATE_PAT_LO _MMIO(0x40e0) #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) +#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x04180) #define BLT_HWS_PGA_GEN7 _MMIO(0x04280) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380) @@ -2512,14 +2578,25 @@ enum i915_power_well_id { #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ +/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ +#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) +#define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) + #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) -#define RING_FORCE_TO_NONPRIV_RW (0 << 28) /* CFL+ & Gen11+ */ -#define RING_FORCE_TO_NONPRIV_RD (1 << 28) -#define RING_FORCE_TO_NONPRIV_WR (2 << 28) +#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) +#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ +#define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_WR (2 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_INVALID (3 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_MASK (3 << 28) #define RING_FORCE_TO_NONPRIV_RANGE_1 (0 << 0) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_RANGE_4 (1 << 0) #define RING_FORCE_TO_NONPRIV_RANGE_16 (2 << 0) #define RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0) +#define RING_FORCE_TO_NONPRIV_MASK_VALID \ + (RING_FORCE_TO_NONPRIV_RANGE_MASK \ + | RING_FORCE_TO_NONPRIV_ACCESS_MASK) #define RING_MAX_NONPRIV_SLOTS 12 #define GEN7_TLB_RD_ADDR _MMIO(0x4700) @@ -2614,9 +2691,13 @@ enum i915_power_well_id { #define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10) #define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14) +#define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8) +#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_AUX_ERR_DBG _MMIO(0x43f4) + #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1 << 31) @@ -2726,6 +2807,7 @@ enum i915_power_well_id { #define VLV_GU_CTL0 _MMIO(VLV_DISPLAY_BASE + 0x2030) #define VLV_GU_CTL1 _MMIO(VLV_DISPLAY_BASE + 0x2034) #define SCPD0 _MMIO(0x209c) /* 915+ only */ +#define CSTATE_RENDER_CLOCK_GATE_DISABLE (1 << 5) #define GEN2_IER _MMIO(0x20a0) #define GEN2_IIR _MMIO(0x20a4) #define GEN2_IMR _MMIO(0x20a8) @@ -2899,6 +2981,7 @@ enum i915_power_well_id { #define GEN6_RC_SLEEP_PSMI_CONTROL _MMIO(0x2050) #define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0) +#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1 << 10) @@ -2977,6 +3060,8 @@ enum i915_power_well_id { #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -3229,25 +3314,7 @@ enum i915_power_well_id { #define GMBUS_RATE_1MHZ (3 << 8) /* reserved on Pineview */ #define GMBUS_HOLD_EXT (1 << 7) /* 300ns hold time, rsvd on Pineview */ #define GMBUS_BYTE_CNT_OVERRIDE (1 << 6) -#define GMBUS_PIN_DISABLED 0 -#define GMBUS_PIN_SSC 1 -#define GMBUS_PIN_VGADDC 2 -#define GMBUS_PIN_PANEL 3 -#define GMBUS_PIN_DPD_CHV 3 /* HDMID_CHV */ -#define GMBUS_PIN_DPC 4 /* HDMIC */ -#define GMBUS_PIN_DPB 5 /* SDVO, HDMIB */ -#define GMBUS_PIN_DPD 6 /* HDMID */ -#define GMBUS_PIN_RESERVED 7 /* 7 reserved */ -#define GMBUS_PIN_1_BXT 1 /* BXT+ (atom) and CNP+ (big core) */ -#define GMBUS_PIN_2_BXT 2 -#define GMBUS_PIN_3_BXT 3 -#define GMBUS_PIN_4_CNP 4 -#define GMBUS_PIN_9_TC1_ICP 9 -#define GMBUS_PIN_10_TC2_ICP 10 -#define GMBUS_PIN_11_TC3_ICP 11 -#define GMBUS_PIN_12_TC4_ICP 12 - -#define GMBUS_NUM_PINS 13 /* including 0 */ + #define GMBUS1 _MMIO(dev_priv->gpio_mmio_base + 0x5104) /* command/status */ #define GMBUS_SW_CLR_INT (1 << 31) #define GMBUS_SW_RDY (1 << 30) @@ -3597,6 +3664,9 @@ enum i915_power_well_id { #define _PALETTE_A 0xa000 #define _PALETTE_B 0xa800 #define _CHV_PALETTE_C 0xc000 +#define PALETTE_RED_MASK REG_GENMASK(23, 16) +#define PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define PALETTE(pipe, i) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + \ _PICK((pipe), _PALETTE_A, \ _PALETTE_B, _CHV_PALETTE_C) + \ @@ -4077,12 +4147,23 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) +#define L3_CLKGATE_DIS REG_BIT(16) +#define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) +#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) -#define VFUNIT_CLKGATE_DIS (1 << 20) +#define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define HSUNIT_CLKGATE_DIS REG_BIT(8) +#define VSUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) +#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) +#define PSDUNIT_CLKGATE_DIS REG_BIT(5) #define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) #define CGPSF_CLKGATE_DIS (1 << 3) @@ -4174,6 +4255,7 @@ enum { #define _VTOTAL_A 0x6000c #define _VBLANK_A 0x60010 #define _VSYNC_A 0x60014 +#define _EXITLINE_A 0x60018 #define _PIPEASRC 0x6001c #define _BCLRPAT_A 0x60020 #define _VSYNCSHIFT_A 0x60028 @@ -4209,6 +4291,7 @@ enum { #define TRANSCODER_B_OFFSET 0x61000 #define TRANSCODER_C_OFFSET 0x62000 #define CHV_TRANSCODER_C_OFFSET 0x63000 +#define TRANSCODER_D_OFFSET 0x63000 #define TRANSCODER_EDP_OFFSET 0x6f000 #define TRANSCODER_DSI0_OFFSET 0x6b000 #define TRANSCODER_DSI1_OFFSET 0x6b800 @@ -4224,10 +4307,22 @@ enum { #define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC) #define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A) -/* HSW+ eDP PSR registers */ -#define HSW_EDP_PSR_BASE 0x64800 -#define BDW_EDP_PSR_BASE 0x6f800 -#define EDP_PSR_CTL _MMIO(dev_priv->psr_mmio_base + 0) +#define EXITLINE(trans) _MMIO_TRANS2(trans, _EXITLINE_A) +#define EXITLINE_ENABLE REG_BIT(31) +#define EXITLINE_MASK REG_GENMASK(12, 0) +#define EXITLINE_SHIFT 0 + +/* + * HSW+ eDP PSR registers + * + * HSW PSR registers are relative to DDIA(_DDI_BUF_CTL_A + 0x800) with just one + * instance of it + */ +#define _HSW_EDP_PSR_BASE 0x64800 +#define _SRD_CTL_A 0x60800 +#define _SRD_CTL_EDP 0x6f800 +#define _PSR_ADJ(tran, reg) (_TRANS2(tran, reg) - dev_priv->hsw_psr_mmio_adjust) +#define EDP_PSR_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_CTL_A)) #define EDP_PSR_ENABLE (1 << 31) #define BDW_PSR_SINGLE_FRAME (1 << 30) #define EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK (1 << 29) /* SW can't modify */ @@ -4253,27 +4348,40 @@ enum { #define EDP_PSR_TP1_TIME_0us (3 << 4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 -/* Bspec claims those aren't shifted but stay at 0x64800 */ +/* + * Until TGL, IMR/IIR are fixed at 0x648xx. On TGL+ those registers are relative + * to transcoder and bits defined for each one as if using no shift (i.e. as if + * it was for TRANSCODER_EDP) + */ #define EDP_PSR_IMR _MMIO(0x64834) #define EDP_PSR_IIR _MMIO(0x64838) -#define EDP_PSR_ERROR(shift) (1 << ((shift) + 2)) -#define EDP_PSR_POST_EXIT(shift) (1 << ((shift) + 1)) -#define EDP_PSR_PRE_ENTRY(shift) (1 << (shift)) -#define EDP_PSR_TRANSCODER_C_SHIFT 24 -#define EDP_PSR_TRANSCODER_B_SHIFT 16 -#define EDP_PSR_TRANSCODER_A_SHIFT 8 -#define EDP_PSR_TRANSCODER_EDP_SHIFT 0 - -#define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) +#define _PSR_IMR_A 0x60814 +#define _PSR_IIR_A 0x60818 +#define TRANS_PSR_IMR(tran) _MMIO_TRANS2(tran, _PSR_IMR_A) +#define TRANS_PSR_IIR(tran) _MMIO_TRANS2(tran, _PSR_IIR_A) +#define _EDP_PSR_TRANS_SHIFT(trans) ((trans) == TRANSCODER_EDP ? \ + 0 : ((trans) - TRANSCODER_A + 1) * 8) +#define EDP_PSR_TRANS_MASK(trans) (0x7 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_ERROR(trans) (0x4 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_POST_EXIT(trans) (0x2 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_PRE_ENTRY(trans) (0x1 << _EDP_PSR_TRANS_SHIFT(trans)) + +#define _SRD_AUX_CTL_A 0x60810 +#define _SRD_AUX_CTL_EDP 0x6f810 +#define EDP_PSR_AUX_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_AUX_CTL_A)) #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) #define EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK (0xf << 16) #define EDP_PSR_AUX_CTL_ERROR_INTERRUPT (1 << 11) #define EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK (0x7ff) -#define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ +#define _SRD_AUX_DATA_A 0x60814 +#define _SRD_AUX_DATA_EDP 0x6f814 +#define EDP_PSR_AUX_DATA(tran, i) _MMIO(_PSR_ADJ(tran, _SRD_AUX_DATA_A) + (i) + 4) /* 5 registers */ -#define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) +#define _SRD_STATUS_A 0x60840 +#define _SRD_STATUS_EDP 0x6f840 +#define EDP_PSR_STATUS(tran) _MMIO(_PSR_ADJ(tran, _SRD_STATUS_A)) #define EDP_PSR_STATUS_STATE_MASK (7 << 29) #define EDP_PSR_STATUS_STATE_SHIFT 29 #define EDP_PSR_STATUS_STATE_IDLE (0 << 29) @@ -4298,10 +4406,15 @@ enum { #define EDP_PSR_STATUS_SENDING_TP1 (1 << 4) #define EDP_PSR_STATUS_IDLE_MASK 0xf -#define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) +#define _SRD_PERF_CNT_A 0x60844 +#define _SRD_PERF_CNT_EDP 0x6f844 +#define EDP_PSR_PERF_CNT(tran) _MMIO(_PSR_ADJ(tran, _SRD_PERF_CNT_A)) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */ +/* PSR_MASK on SKL+ */ +#define _SRD_DEBUG_A 0x60860 +#define _SRD_DEBUG_EDP 0x6f860 +#define EDP_PSR_DEBUG(tran) _MMIO(_PSR_ADJ(tran, _SRD_DEBUG_A)) #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1 << 28) #define EDP_PSR_DEBUG_MASK_LPSP (1 << 27) #define EDP_PSR_DEBUG_MASK_MEMUP (1 << 26) @@ -4309,7 +4422,9 @@ enum { #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1 << 16) /* Reserved in ICL+ */ #define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1 << 15) /* SKL+ */ -#define EDP_PSR2_CTL _MMIO(0x6f900) +#define _PSR2_CTL_A 0x60900 +#define _PSR2_CTL_EDP 0x6f900 +#define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) #define EDP_PSR2_ENABLE (1 << 31) #define EDP_SU_TRACK_ENABLE (1 << 30) #define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ @@ -4331,8 +4446,8 @@ enum { #define _PSR_EVENT_TRANS_B 0x61848 #define _PSR_EVENT_TRANS_C 0x62848 #define _PSR_EVENT_TRANS_D 0x63848 -#define _PSR_EVENT_TRANS_EDP 0x6F848 -#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define _PSR_EVENT_TRANS_EDP 0x6f848 +#define PSR_EVENT(tran) _MMIO_TRANS2(tran, _PSR_EVENT_TRANS_A) #define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) #define PSR_EVENT_PSR2_DISABLED (1 << 16) #define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) @@ -4350,15 +4465,16 @@ enum { #define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) #define PSR_EVENT_PSR_DISABLE (1 << 0) -#define EDP_PSR2_STATUS _MMIO(0x6f940) +#define _PSR2_STATUS_A 0x60940 +#define _PSR2_STATUS_EDP 0x6f940 +#define EDP_PSR2_STATUS(tran) _MMIO_TRANS2(tran, _PSR2_STATUS_A) #define EDP_PSR2_STATUS_STATE_MASK (0xf << 28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 -#define _PSR2_SU_STATUS_0 0x6F914 -#define _PSR2_SU_STATUS_1 0x6F918 -#define _PSR2_SU_STATUS_2 0x6F91C -#define _PSR2_SU_STATUS(index) _MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1)) -#define PSR2_SU_STATUS(frame) (_PSR2_SU_STATUS((frame) / 3)) +#define _PSR2_SU_STATUS_A 0x60914 +#define _PSR2_SU_STATUS_EDP 0x6f914 +#define _PSR2_SU_STATUS(tran, index) _MMIO(_TRANS2(tran, _PSR2_SU_STATUS_A) + (index) * 4) +#define PSR2_SU_STATUS(tran, frame) (_PSR2_SU_STATUS(tran, (frame) / 3)) #define PSR2_SU_STATUS_SHIFT(frame) (((frame) % 3) * 10) #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 @@ -4684,6 +4800,7 @@ enum { * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte * of the infoframe structure specified by CEA-861. */ #define VIDEO_DIP_DATA_SIZE 32 +#define VIDEO_DIP_GMP_DATA_SIZE 36 #define VIDEO_DIP_VSC_DATA_SIZE 36 #define VIDEO_DIP_PPS_DATA_SIZE 132 #define VIDEO_DIP_CTL _MMIO(0x61170) @@ -4911,14 +5028,20 @@ enum { #define BLM_PCH_POLARITY (1 << 29) #define BLC_PWM_PCH_CTL2 _MMIO(0xc8254) -#define UTIL_PIN_CTL _MMIO(0x48400) -#define UTIL_PIN_ENABLE (1 << 31) - -#define UTIL_PIN_PIPE(x) ((x) << 29) -#define UTIL_PIN_PIPE_MASK (3 << 29) -#define UTIL_PIN_MODE_PWM (1 << 24) -#define UTIL_PIN_MODE_MASK (0xf << 24) -#define UTIL_PIN_POLARITY (1 << 22) +#define UTIL_PIN_CTL _MMIO(0x48400) +#define UTIL_PIN_ENABLE (1 << 31) +#define UTIL_PIN_PIPE_MASK (3 << 29) +#define UTIL_PIN_PIPE(x) ((x) << 29) +#define UTIL_PIN_MODE_MASK (0xf << 24) +#define UTIL_PIN_MODE_DATA (0 << 24) +#define UTIL_PIN_MODE_PWM (1 << 24) +#define UTIL_PIN_MODE_VBLANK (4 << 24) +#define UTIL_PIN_MODE_VSYNC (5 << 24) +#define UTIL_PIN_MODE_EYE_LEVEL (8 << 24) +#define UTIL_PIN_OUTPUT_DATA (1 << 23) +#define UTIL_PIN_POLARITY (1 << 22) +#define UTIL_PIN_DIRECTION_INPUT (1 << 19) +#define UTIL_PIN_INPUT_DATA (1 << 16) /* BXT backlight register definition. */ #define _BXT_BLC_PWM_CTL1 0xC8250 @@ -5520,45 +5643,9 @@ enum { */ #define _DPA_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64010) #define _DPA_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64014) -#define _DPA_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64018) -#define _DPA_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6401c) -#define _DPA_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64020) -#define _DPA_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64024) #define _DPB_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64110) #define _DPB_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64114) -#define _DPB_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64118) -#define _DPB_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6411c) -#define _DPB_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64120) -#define _DPB_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64124) - -#define _DPC_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64210) -#define _DPC_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64214) -#define _DPC_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64218) -#define _DPC_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6421c) -#define _DPC_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64220) -#define _DPC_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64224) - -#define _DPD_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64310) -#define _DPD_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64314) -#define _DPD_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64318) -#define _DPD_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6431c) -#define _DPD_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64320) -#define _DPD_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64324) - -#define _DPE_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64410) -#define _DPE_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64414) -#define _DPE_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64418) -#define _DPE_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6441c) -#define _DPE_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64420) -#define _DPE_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64424) - -#define _DPF_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64510) -#define _DPF_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64514) -#define _DPF_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64518) -#define _DPF_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6451c) -#define _DPF_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64520) -#define _DPF_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64524) #define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) #define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ @@ -5656,7 +5743,8 @@ enum { #define PIPECONF_DOUBLE_WIDE (1 << 30) #define I965_PIPECONF_ACTIVE (1 << 30) #define PIPECONF_DSI_PLL_LOCKED (1 << 29) /* vlv & pipe A only */ -#define PIPECONF_FRAME_START_DELAY_MASK (3 << 27) +#define PIPECONF_FRAME_START_DELAY_MASK (3 << 27) /* pre-hsw */ +#define PIPECONF_FRAME_START_DELAY(x) ((x) << 27) /* pre-hsw: 0-3 */ #define PIPECONF_SINGLE_WIDE 0 #define PIPECONF_PIPE_UNLOCKED 0 #define PIPECONF_PIPE_LOCKED (1 << 25) @@ -5690,6 +5778,11 @@ enum { #define PIPECONF_CXSR_DOWNCLOCK (1 << 16) #define PIPECONF_EDP_RR_MODE_SWITCH_VLV (1 << 14) #define PIPECONF_COLOR_RANGE_SELECT (1 << 13) +#define PIPECONF_OUTPUT_COLORSPACE_MASK (3 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_RGB (0 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV601 (1 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV709 (2 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV_HSW (1 << 11) /* hsw only */ #define PIPECONF_BPC_MASK (0x7 << 5) #define PIPECONF_8BPC (0 << 5) #define PIPECONF_10BPC (1 << 5) @@ -5755,6 +5848,7 @@ enum { #define PIPE_A_OFFSET 0x70000 #define PIPE_B_OFFSET 0x71000 #define PIPE_C_OFFSET 0x72000 +#define PIPE_D_OFFSET 0x73000 #define CHV_PIPE_C_OFFSET 0x74000 /* * There's actually no pipe EDP. Some pipe registers have @@ -5776,12 +5870,13 @@ enum { #define _PIPEAGCMAX 0x70010 #define _PIPEBGCMAX 0x71010 +#define PIPEGCMAX_RGB_MASK REG_GENMASK(15, 0) #define PIPEGCMAX(pipe, i) _MMIO_PIPE2(pipe, _PIPEAGCMAX + (i) * 4) #define _PIPE_MISC_A 0x70030 #define _PIPE_MISC_B 0x71030 -#define PIPEMISC_YUV420_ENABLE (1 << 27) -#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) +#define PIPEMISC_YUV420_ENABLE (1 << 27) /* glk+ */ +#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) /* glk+ */ #define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_DITHER_BPC_MASK (7 << 5) @@ -6238,6 +6333,7 @@ enum { #define CHV_CURSOR_C_OFFSET 0x700e0 #define IVB_CURSOR_B_OFFSET 0x71080 #define IVB_CURSOR_C_OFFSET 0x72080 +#define TGL_CURSOR_D_OFFSET 0x73080 /* Display A control */ #define _DSPACNTR 0x70180 @@ -6256,6 +6352,7 @@ enum { #define DISPPLANE_RGBX101010 (0x8 << 26) #define DISPPLANE_RGBA101010 (0x9 << 26) #define DISPPLANE_BGRX101010 (0xa << 26) +#define DISPPLANE_BGRA101010 (0xb << 26) #define DISPPLANE_RGBX161616 (0xc << 26) #define DISPPLANE_RGBX888 (0xe << 26) #define DISPPLANE_RGBA888 (0xf << 26) @@ -6284,6 +6381,7 @@ enum { #define _DSPATILEOFF 0x701A4 /* 965+ only */ #define _DSPAOFFSET 0x701A4 /* HSW */ #define _DSPASURFLIVE 0x701AC +#define _DSPAGAMC 0x701E0 #define DSPCNTR(plane) _MMIO_PIPE2(plane, _DSPACNTR) #define DSPADDR(plane) _MMIO_PIPE2(plane, _DSPAADDR) @@ -6295,6 +6393,7 @@ enum { #define DSPLINOFF(plane) DSPADDR(plane) #define DSPOFFSET(plane) _MMIO_PIPE2(plane, _DSPAOFFSET) #define DSPSURFLIVE(plane) _MMIO_PIPE2(plane, _DSPASURFLIVE) +#define DSPGAMC(plane, i) _MMIO(_PIPE2(plane, _DSPAGAMC) + (5 - (i)) * 4) /* plane C only, 6 x u0.8 */ /* CHV pipe B blender and primary plane */ #define _CHV_BLEND_A 0x60a00 @@ -6397,6 +6496,7 @@ enum { #define _DVSAKEYMAXVAL 0x721a0 #define _DVSATILEOFF 0x721a4 #define _DVSASURFLIVE 0x721ac +#define _DVSAGAMC_G4X 0x721e0 /* g4x */ #define _DVSASCALE 0x72204 #define DVS_SCALE_ENABLE (1 << 31) #define DVS_FILTER_MASK (3 << 29) @@ -6405,7 +6505,8 @@ enum { #define DVS_FILTER_SOFTENING (2 << 29) #define DVS_VERTICAL_OFFSET_HALF (1 << 28) /* must be enabled below */ #define DVS_VERTICAL_OFFSET_ENABLE (1 << 27) -#define _DVSAGAMC 0x72300 +#define _DVSAGAMC_ILK 0x72300 /* ilk/snb */ +#define _DVSAGAMCMAX_ILK 0x72340 /* ilk/snb */ #define _DVSBCNTR 0x73180 #define _DVSBLINOFF 0x73184 @@ -6418,8 +6519,10 @@ enum { #define _DVSBKEYMAXVAL 0x731a0 #define _DVSBTILEOFF 0x731a4 #define _DVSBSURFLIVE 0x731ac +#define _DVSBGAMC_G4X 0x731e0 /* g4x */ #define _DVSBSCALE 0x73204 -#define _DVSBGAMC 0x73300 +#define _DVSBGAMC_ILK 0x73300 /* ilk/snb */ +#define _DVSBGAMCMAX_ILK 0x73340 /* ilk/snb */ #define DVSCNTR(pipe) _MMIO_PIPE(pipe, _DVSACNTR, _DVSBCNTR) #define DVSLINOFF(pipe) _MMIO_PIPE(pipe, _DVSALINOFF, _DVSBLINOFF) @@ -6433,6 +6536,9 @@ enum { #define DVSKEYVAL(pipe) _MMIO_PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL) #define DVSKEYMSK(pipe) _MMIO_PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK) #define DVSSURFLIVE(pipe) _MMIO_PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE) +#define DVSGAMC_G4X(pipe, i) _MMIO(_PIPE(pipe, _DVSAGAMC_G4X, _DVSBGAMC_G4X) + (5 - (i)) * 4) /* 6 x u0.8 */ +#define DVSGAMC_ILK(pipe, i) _MMIO(_PIPE(pipe, _DVSAGAMC_ILK, _DVSBGAMC_ILK) + (i) * 4) /* 16 x u0.10 */ +#define DVSGAMCMAX_ILK(pipe, i) _MMIO(_PIPE(pipe, _DVSAGAMCMAX_ILK, _DVSBGAMCMAX_ILK) + (i) * 4) /* 3 x u1.10 */ #define _SPRA_CTL 0x70280 #define SPRITE_ENABLE (1 << 31) @@ -6457,7 +6563,7 @@ enum { #define SPRITE_YUV_ORDER_VYUY (3 << 16) #define SPRITE_ROTATE_180 (1 << 15) #define SPRITE_TRICKLE_FEED_DISABLE (1 << 14) -#define SPRITE_INT_GAMMA_ENABLE (1 << 13) +#define SPRITE_INT_GAMMA_DISABLE (1 << 13) #define SPRITE_TILED (1 << 10) #define SPRITE_DEST_KEY (1 << 2) #define _SPRA_LINOFF 0x70284 @@ -6480,6 +6586,8 @@ enum { #define SPRITE_VERTICAL_OFFSET_HALF (1 << 28) /* must be enabled below */ #define SPRITE_VERTICAL_OFFSET_ENABLE (1 << 27) #define _SPRA_GAMC 0x70400 +#define _SPRA_GAMC16 0x70440 +#define _SPRA_GAMC17 0x7044c #define _SPRB_CTL 0x71280 #define _SPRB_LINOFF 0x71284 @@ -6495,6 +6603,8 @@ enum { #define _SPRB_SURFLIVE 0x712ac #define _SPRB_SCALE 0x71304 #define _SPRB_GAMC 0x71400 +#define _SPRB_GAMC16 0x71440 +#define _SPRB_GAMC17 0x7144c #define SPRCTL(pipe) _MMIO_PIPE(pipe, _SPRA_CTL, _SPRB_CTL) #define SPRLINOFF(pipe) _MMIO_PIPE(pipe, _SPRA_LINOFF, _SPRB_LINOFF) @@ -6508,19 +6618,24 @@ enum { #define SPRTILEOFF(pipe) _MMIO_PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF) #define SPROFFSET(pipe) _MMIO_PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET) #define SPRSCALE(pipe) _MMIO_PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE) -#define SPRGAMC(pipe) _MMIO_PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC) +#define SPRGAMC(pipe, i) _MMIO(_PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC) + (i) * 4) /* 16 x u0.10 */ +#define SPRGAMC16(pipe, i) _MMIO(_PIPE(pipe, _SPRA_GAMC16, _SPRB_GAMC16) + (i) * 4) /* 3 x u1.10 */ +#define SPRGAMC17(pipe, i) _MMIO(_PIPE(pipe, _SPRA_GAMC17, _SPRB_GAMC17) + (i) * 4) /* 3 x u2.10 */ #define SPRSURFLIVE(pipe) _MMIO_PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE) #define _SPACNTR (VLV_DISPLAY_BASE + 0x72180) #define SP_ENABLE (1 << 31) #define SP_GAMMA_ENABLE (1 << 30) #define SP_PIXFORMAT_MASK (0xf << 26) -#define SP_FORMAT_YUV422 (0 << 26) -#define SP_FORMAT_BGR565 (5 << 26) -#define SP_FORMAT_BGRX8888 (6 << 26) -#define SP_FORMAT_BGRA8888 (7 << 26) -#define SP_FORMAT_RGBX1010102 (8 << 26) -#define SP_FORMAT_RGBA1010102 (9 << 26) +#define SP_FORMAT_YUV422 (0x0 << 26) +#define SP_FORMAT_8BPP (0x2 << 26) +#define SP_FORMAT_BGR565 (0x5 << 26) +#define SP_FORMAT_BGRX8888 (0x6 << 26) +#define SP_FORMAT_BGRA8888 (0x7 << 26) +#define SP_FORMAT_RGBX1010102 (0x8 << 26) +#define SP_FORMAT_RGBA1010102 (0x9 << 26) +#define SP_FORMAT_BGRX1010102 (0xa << 26) /* CHV pipe B */ +#define SP_FORMAT_BGRA1010102 (0xb << 26) /* CHV pipe B */ #define SP_FORMAT_RGBX8888 (0xe << 26) #define SP_FORMAT_RGBA8888 (0xf << 26) #define SP_ALPHA_PREMULTIPLY (1 << 23) /* CHV pipe B */ @@ -6551,7 +6666,7 @@ enum { #define _SPACLRC1 (VLV_DISPLAY_BASE + 0x721d4) #define SP_SH_SIN(x) (((x) & 0x7ff) << 16) /* s4.7 */ #define SP_SH_COS(x) (x) /* u3.7 */ -#define _SPAGAMC (VLV_DISPLAY_BASE + 0x721f4) +#define _SPAGAMC (VLV_DISPLAY_BASE + 0x721e0) #define _SPBCNTR (VLV_DISPLAY_BASE + 0x72280) #define _SPBLINOFF (VLV_DISPLAY_BASE + 0x72284) @@ -6566,10 +6681,12 @@ enum { #define _SPBCONSTALPHA (VLV_DISPLAY_BASE + 0x722a8) #define _SPBCLRC0 (VLV_DISPLAY_BASE + 0x722d0) #define _SPBCLRC1 (VLV_DISPLAY_BASE + 0x722d4) -#define _SPBGAMC (VLV_DISPLAY_BASE + 0x722f4) +#define _SPBGAMC (VLV_DISPLAY_BASE + 0x722e0) +#define _VLV_SPR(pipe, plane_id, reg_a, reg_b) \ + _PIPE((pipe) * 2 + (plane_id) - PLANE_SPRITE0, (reg_a), (reg_b)) #define _MMIO_VLV_SPR(pipe, plane_id, reg_a, reg_b) \ - _MMIO_PIPE((pipe) * 2 + (plane_id) - PLANE_SPRITE0, (reg_a), (reg_b)) + _MMIO(_VLV_SPR((pipe), (plane_id), (reg_a), (reg_b))) #define SPCNTR(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACNTR, _SPBCNTR) #define SPLINOFF(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPALINOFF, _SPBLINOFF) @@ -6584,7 +6701,7 @@ enum { #define SPCONSTALPHA(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACONSTALPHA, _SPBCONSTALPHA) #define SPCLRC0(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACLRC0, _SPBCLRC0) #define SPCLRC1(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACLRC1, _SPBCLRC1) -#define SPGAMC(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPAGAMC, _SPBGAMC) +#define SPGAMC(pipe, plane_id, i) _MMIO(_VLV_SPR((pipe), (plane_id), _SPAGAMC, _SPBGAMC) + (5 - (i)) * 4) /* 6 x u0.10 */ /* * CHV pipe B sprite CSC @@ -6669,6 +6786,7 @@ enum { #define PLANE_CTL_YUV422_VYUY (3 << 16) #define PLANE_CTL_RENDER_DECOMPRESSION_ENABLE (1 << 15) #define PLANE_CTL_TRICKLE_FEED_DISABLE (1 << 14) +#define PLANE_CTL_CLEAR_COLOR_DISABLE (1 << 13) /* TGL+ */ #define PLANE_CTL_PLANE_GAMMA_DISABLE (1 << 13) /* Pre-GLK */ #define PLANE_CTL_TILED_MASK (0x7 << 10) #define PLANE_CTL_TILED_LINEAR (0 << 10) @@ -6676,6 +6794,7 @@ enum { #define PLANE_CTL_TILED_Y (4 << 10) #define PLANE_CTL_TILED_YF (5 << 10) #define PLANE_CTL_FLIP_HORIZONTAL (1 << 8) +#define PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE (1 << 4) /* TGL+ */ #define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */ #define PLANE_CTL_ALPHA_DISABLE (0 << 4) #define PLANE_CTL_ALPHA_SW_PREMULTIPLY (2 << 4) @@ -7191,11 +7310,17 @@ enum { /* legacy palette */ #define _LGC_PALETTE_A 0x4a000 #define _LGC_PALETTE_B 0x4a800 +#define LGC_PALETTE_RED_MASK REG_GENMASK(23, 16) +#define LGC_PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define LGC_PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4) /* ilk/snb precision palette */ #define _PREC_PALETTE_A 0x4b000 #define _PREC_PALETTE_B 0x4c000 +#define PREC_PALETTE_RED_MASK REG_GENMASK(29, 20) +#define PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PALETTE_BLUE_MASK REG_GENMASK(9, 0) #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4) #define _PREC_PIPEAGCMAX 0x4d000 @@ -7228,6 +7353,14 @@ enum { #define SKL_CSR_DC3_DC5_COUNT _MMIO(0x80030) #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) +#define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) +#define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) + +#define DMC_DEBUG3 _MMIO(0x101090) + +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) @@ -7317,16 +7450,6 @@ enum { #define GEN8_GT_IIR(which) _MMIO(0x44308 + (0x10 * (which))) #define GEN8_GT_IER(which) _MMIO(0x4430c + (0x10 * (which))) -#define GEN9_GUC_TO_HOST_INT_EVENT (1 << 31) -#define GEN9_GUC_EXEC_ERROR_EVENT (1 << 30) -#define GEN9_GUC_DISPLAY_EVENT (1 << 29) -#define GEN9_GUC_SEMA_SIGNAL_EVENT (1 << 28) -#define GEN9_GUC_IOMMU_MSG_EVENT (1 << 27) -#define GEN9_GUC_DB_RING_EVENT (1 << 26) -#define GEN9_GUC_DMA_DONE_EVENT (1 << 25) -#define GEN9_GUC_FATAL_ERROR_EVENT (1 << 24) -#define GEN9_GUC_NOTIFICATION_EVENT (1 << 23) - #define GEN8_RCS_IRQ_SHIFT 0 #define GEN8_BCS_IRQ_SHIFT 16 #define GEN8_VCS0_IRQ_SHIFT 0 /* NB: VCS1 in bspec! */ @@ -7350,6 +7473,9 @@ enum { #define GEN8_PIPE_VSYNC (1 << 1) #define GEN8_PIPE_VBLANK (1 << 0) #define GEN9_PIPE_CURSOR_FAULT (1 << 11) +#define GEN11_PIPE_PLANE7_FAULT (1 << 22) +#define GEN11_PIPE_PLANE6_FAULT (1 << 21) +#define GEN11_PIPE_PLANE5_FAULT (1 << 20) #define GEN9_PIPE_PLANE4_FAULT (1 << 10) #define GEN9_PIPE_PLANE3_FAULT (1 << 9) #define GEN9_PIPE_PLANE2_FAULT (1 << 8) @@ -7369,16 +7495,25 @@ enum { GEN9_PIPE_PLANE3_FAULT | \ GEN9_PIPE_PLANE2_FAULT | \ GEN9_PIPE_PLANE1_FAULT) +#define GEN11_DE_PIPE_IRQ_FAULT_ERRORS \ + (GEN9_DE_PIPE_IRQ_FAULT_ERRORS | \ + GEN11_PIPE_PLANE7_FAULT | \ + GEN11_PIPE_PLANE6_FAULT | \ + GEN11_PIPE_PLANE5_FAULT) #define GEN8_DE_PORT_ISR _MMIO(0x44440) #define GEN8_DE_PORT_IMR _MMIO(0x44444) #define GEN8_DE_PORT_IIR _MMIO(0x44448) #define GEN8_DE_PORT_IER _MMIO(0x4444c) +#define DSI1_NON_TE (1 << 31) +#define DSI0_NON_TE (1 << 30) #define ICL_AUX_CHANNEL_E (1 << 29) #define CNL_AUX_CHANNEL_F (1 << 28) #define GEN9_AUX_CHANNEL_D (1 << 27) #define GEN9_AUX_CHANNEL_C (1 << 26) #define GEN9_AUX_CHANNEL_B (1 << 25) +#define DSI1_TE (1 << 24) +#define DSI0_TE (1 << 23) #define BXT_DE_PORT_HP_DDIC (1 << 5) #define BXT_DE_PORT_HP_DDIB (1 << 4) #define BXT_DE_PORT_HP_DDIA (1 << 3) @@ -7388,6 +7523,15 @@ enum { #define GEN8_PORT_DP_A_HOTPLUG (1 << 3) #define BXT_DE_PORT_GMBUS (1 << 1) #define GEN8_AUX_CHANNEL_A (1 << 0) +#define TGL_DE_PORT_AUX_USBC6 (1 << 13) +#define TGL_DE_PORT_AUX_USBC5 (1 << 12) +#define TGL_DE_PORT_AUX_USBC4 (1 << 11) +#define TGL_DE_PORT_AUX_USBC3 (1 << 10) +#define TGL_DE_PORT_AUX_USBC2 (1 << 9) +#define TGL_DE_PORT_AUX_USBC1 (1 << 8) +#define TGL_DE_PORT_AUX_DDIC (1 << 2) +#define TGL_DE_PORT_AUX_DDIB (1 << 1) +#define TGL_DE_PORT_AUX_DDIA (1 << 0) #define GEN8_DE_MISC_ISR _MMIO(0x44460) #define GEN8_DE_MISC_IMR _MMIO(0x44464) @@ -7431,21 +7575,29 @@ enum { #define GEN11_DE_HPD_IMR _MMIO(0x44474) #define GEN11_DE_HPD_IIR _MMIO(0x44478) #define GEN11_DE_HPD_IER _MMIO(0x4447c) +#define GEN12_TC6_HOTPLUG (1 << 21) +#define GEN12_TC5_HOTPLUG (1 << 20) #define GEN11_TC4_HOTPLUG (1 << 19) #define GEN11_TC3_HOTPLUG (1 << 18) #define GEN11_TC2_HOTPLUG (1 << 17) #define GEN11_TC1_HOTPLUG (1 << 16) #define GEN11_TC_HOTPLUG(tc_port) (1 << ((tc_port) + 16)) -#define GEN11_DE_TC_HOTPLUG_MASK (GEN11_TC4_HOTPLUG | \ +#define GEN11_DE_TC_HOTPLUG_MASK (GEN12_TC6_HOTPLUG | \ + GEN12_TC5_HOTPLUG | \ + GEN11_TC4_HOTPLUG | \ GEN11_TC3_HOTPLUG | \ GEN11_TC2_HOTPLUG | \ GEN11_TC1_HOTPLUG) +#define GEN12_TBT6_HOTPLUG (1 << 5) +#define GEN12_TBT5_HOTPLUG (1 << 4) #define GEN11_TBT4_HOTPLUG (1 << 3) #define GEN11_TBT3_HOTPLUG (1 << 2) #define GEN11_TBT2_HOTPLUG (1 << 1) #define GEN11_TBT1_HOTPLUG (1 << 0) #define GEN11_TBT_HOTPLUG(tc_port) (1 << (tc_port)) -#define GEN11_DE_TBT_HOTPLUG_MASK (GEN11_TBT4_HOTPLUG | \ +#define GEN11_DE_TBT_HOTPLUG_MASK (GEN12_TBT6_HOTPLUG | \ + GEN12_TBT5_HOTPLUG | \ + GEN11_TBT4_HOTPLUG | \ GEN11_TBT3_HOTPLUG | \ GEN11_TBT2_HOTPLUG | \ GEN11_TBT1_HOTPLUG) @@ -7479,6 +7631,9 @@ enum { #define GEN11_INTR_ENGINE_CLASS(x) (((x) & GENMASK(18, 16)) >> 16) #define GEN11_INTR_ENGINE_INSTANCE(x) (((x) & GENMASK(25, 20)) >> 20) #define GEN11_INTR_ENGINE_INTR(x) ((x) & 0xffff) +/* irq instances for OTHER_CLASS */ +#define OTHER_GUC_INSTANCE 0 +#define OTHER_GTPM_INSTANCE 1 #define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) @@ -7562,10 +7717,19 @@ enum { #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) -#define CHICKEN_TRANS_A _MMIO(0x420c0) -#define CHICKEN_TRANS_B _MMIO(0x420c4) -#define CHICKEN_TRANS_C _MMIO(0x420c8) -#define CHICKEN_TRANS_EDP _MMIO(0x420cc) +#define _CHICKEN_TRANS_A 0x420c0 +#define _CHICKEN_TRANS_B 0x420c4 +#define _CHICKEN_TRANS_C 0x420c8 +#define _CHICKEN_TRANS_EDP 0x420cc +#define _CHICKEN_TRANS_D 0x420d8 +#define CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ + [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \ + [TRANSCODER_A] = _CHICKEN_TRANS_A, \ + [TRANSCODER_B] = _CHICKEN_TRANS_B, \ + [TRANSCODER_C] = _CHICKEN_TRANS_C, \ + [TRANSCODER_D] = _CHICKEN_TRANS_D)) +#define HSW_FRAME_START_DELAY_MASK (3 << 27) +#define HSW_FRAME_START_DELAY(x) ((x) << 27) /* 0-3 */ #define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) #define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) @@ -7589,6 +7753,14 @@ enum { #define GEN7_MSG_CTL _MMIO(0x45010) #define WAIT_FOR_PCH_RESET_ACK (1 << 1) #define WAIT_FOR_PCH_FLR_ACK (1 << 0) + +#define BW_BUDDY1_CTL _MMIO(0x45140) +#define BW_BUDDY2_CTL _MMIO(0x45150) +#define BW_BUDDY_DISABLE REG_BIT(31) + +#define BW_BUDDY1_PAGE_MASK _MMIO(0x45144) +#define BW_BUDDY2_PAGE_MASK _MMIO(0x45154) + #define HSW_NDE_RSTWRN_OPT _MMIO(0x46408) #define RESET_PCH_HANDSHAKE_ENABLE (1 << 4) @@ -7598,14 +7770,19 @@ enum { #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) -#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) -#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) -#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) -#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) -#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) -#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) -#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) -#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) +#define SKL_DFSM_DISPLAY_PM_DISABLE (1 << 27) +#define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) +#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) +#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) +#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) +#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) +#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) +#define ICL_DFSM_DMC_DISABLE (1 << 23) +#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) +#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) +#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) +#define TGL_DFSM_PIPE_D_DISABLE (1 << 22) +#define CNL_DFSM_DISPLAY_DSC_DISABLE (1 << 7) #define SKL_DSSM _MMIO(0x51004) #define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) @@ -7622,7 +7799,10 @@ enum { #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE (1 << 10) #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) +#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11) + #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) @@ -7647,6 +7827,7 @@ enum { #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC (1 << 11) + #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE (1 << 9) #define HIZ_CHICKEN _MMIO(0x7018) # define CHV_HZ_8X8_MODE_IN_1X (1 << 15) @@ -7690,6 +7871,9 @@ enum { #define GEN7_L3SQCREG4 _MMIO(0xb034) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1 << 27) +#define GEN11_SCRATCH2 _MMIO(0xb140) +#define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19) + #define GEN8_L3SQCREG4 _MMIO(0xb118) #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) #define GEN8_LQSC_RO_PERF_DIS (1 << 27) @@ -7734,6 +7918,10 @@ enum { #define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15) #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define FF_MODE2 _MMIO(0x6604) +#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) +#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) + /* PCH */ #define PCH_DISPLAY_BASE 0xc0000u @@ -7827,22 +8015,25 @@ enum { SDE_FDI_RXB_CPT | \ SDE_FDI_RXA_CPT) -/* south display engine interrupt: ICP */ -#define SDE_TC4_HOTPLUG_ICP (1 << 27) -#define SDE_TC3_HOTPLUG_ICP (1 << 26) -#define SDE_TC2_HOTPLUG_ICP (1 << 25) -#define SDE_TC1_HOTPLUG_ICP (1 << 24) +/* south display engine interrupt: ICP/TGP */ #define SDE_GMBUS_ICP (1 << 23) -#define SDE_DDIB_HOTPLUG_ICP (1 << 17) -#define SDE_DDIA_HOTPLUG_ICP (1 << 16) #define SDE_TC_HOTPLUG_ICP(tc_port) (1 << ((tc_port) + 24)) #define SDE_DDI_HOTPLUG_ICP(port) (1 << ((port) + 16)) -#define SDE_DDI_MASK_ICP (SDE_DDIB_HOTPLUG_ICP | \ - SDE_DDIA_HOTPLUG_ICP) -#define SDE_TC_MASK_ICP (SDE_TC4_HOTPLUG_ICP | \ - SDE_TC3_HOTPLUG_ICP | \ - SDE_TC2_HOTPLUG_ICP | \ - SDE_TC1_HOTPLUG_ICP) +#define SDE_DDI_MASK_ICP (SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_ICP (SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) +#define SDE_DDI_MASK_TGP (SDE_DDI_HOTPLUG_ICP(PORT_C) | \ + SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_TGP (SDE_TC_HOTPLUG_ICP(PORT_TC6) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC5) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) #define SDEISR _MMIO(0xc4000) #define SDEIMR _MMIO(0xc4004) @@ -7909,23 +8100,20 @@ enum { * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC. */ -#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) -#define ICP_DDIB_HPD_ENABLE (1 << 7) -#define ICP_DDIB_HPD_STATUS_MASK (3 << 4) -#define ICP_DDIB_HPD_NO_DETECT (0 << 4) -#define ICP_DDIB_HPD_SHORT_DETECT (1 << 4) -#define ICP_DDIB_HPD_LONG_DETECT (2 << 4) -#define ICP_DDIB_HPD_SHORT_LONG_DETECT (3 << 4) -#define ICP_DDIA_HPD_ENABLE (1 << 3) -#define ICP_DDIA_HPD_OP_DRIVE_1 (1 << 2) -#define ICP_DDIA_HPD_STATUS_MASK (3 << 0) -#define ICP_DDIA_HPD_NO_DETECT (0 << 0) -#define ICP_DDIA_HPD_SHORT_DETECT (1 << 0) -#define ICP_DDIA_HPD_LONG_DETECT (2 << 0) -#define ICP_DDIA_HPD_SHORT_LONG_DETECT (3 << 0) +#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) +#define SHOTPLUG_CTL_DDI_HPD_ENABLE(port) (0x8 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(port) (0x3 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(port) (0x0 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(port) (0x1 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(port) (0x2 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_LONG_DETECT(port) (0x3 << (4 * (port))) #define SHOTPLUG_CTL_TC _MMIO(0xc4034) #define ICP_TC_HPD_ENABLE(tc_port) (8 << (tc_port) * 4) + +#define SHPD_FILTER_CNT _MMIO(0xc4038) +#define SHPD_FILTER_CNT_500_ADJ 0x001D9 + /* Icelake DSC Rate Control Range Parameter Registers */ #define DSCA_RC_RANGE_PARAMETERS_0 _MMIO(0x6B240) #define DSCA_RC_RANGE_PARAMETERS_0_UDW _MMIO(0x6B240 + 4) @@ -8033,6 +8221,19 @@ enum { #define ICP_TC_HPD_LONG_DETECT(tc_port) (2 << (tc_port) * 4) #define ICP_TC_HPD_SHORT_DETECT(tc_port) (1 << (tc_port) * 4) +#define ICP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) +#define ICP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC4) | \ + ICP_TC_HPD_ENABLE(PORT_TC3) | \ + ICP_TC_HPD_ENABLE(PORT_TC2) | \ + ICP_TC_HPD_ENABLE(PORT_TC1)) +#define TGP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_C) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) +#define TGP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC6) | \ + ICP_TC_HPD_ENABLE(PORT_TC5) | \ + ICP_TC_HPD_ENABLE_MASK) + #define _PCH_DPLL_A 0xc6014 #define _PCH_DPLL_B 0xc6018 #define PCH_DPLL(pll) _MMIO((pll) == 0 ? _PCH_DPLL_A : _PCH_DPLL_B) @@ -8263,10 +8464,8 @@ enum { #define TRANS_STATE_MASK (1 << 30) #define TRANS_STATE_DISABLE (0 << 30) #define TRANS_STATE_ENABLE (1 << 30) -#define TRANS_FSYNC_DELAY_HB1 (0 << 27) -#define TRANS_FSYNC_DELAY_HB2 (1 << 27) -#define TRANS_FSYNC_DELAY_HB3 (2 << 27) -#define TRANS_FSYNC_DELAY_HB4 (3 << 27) +#define TRANS_FRAME_START_DELAY_MASK (3 << 27) /* ibx */ +#define TRANS_FRAME_START_DELAY(x) ((x) << 27) /* ibx: 0-3 */ #define TRANS_INTERLACE_MASK (7 << 21) #define TRANS_PROGRESSIVE (0 << 21) #define TRANS_INTERLACED (3 << 21) @@ -8287,6 +8486,7 @@ enum { #define TRANS_CHICKEN2_TIMING_OVERRIDE (1 << 31) #define TRANS_CHICKEN2_FDI_POLARITY_REVERSED (1 << 29) #define TRANS_CHICKEN2_FRAME_START_DELAY_MASK (3 << 27) +#define TRANS_CHICKEN2_FRAME_START_DELAY(x) ((x) << 27) /* 0-3 */ #define TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER (1 << 26) #define TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH (1 << 25) @@ -8578,6 +8778,10 @@ enum { #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) +#define POWERGATE_ENABLE _MMIO(0xa210) +#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) +#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) + #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) @@ -8815,6 +9019,7 @@ enum { #define GEN9_SAGV_DISABLE 0x0 #define GEN9_SAGV_IS_DISABLED 0x1 #define GEN9_SAGV_ENABLE 0x3 +#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 @@ -9078,6 +9283,10 @@ enum { #define HSW_AUD_CHICKENBIT _MMIO(0x65f10) #define SKL_AUD_CODEC_WAKE_SIGNAL (1 << 15) +#define AUD_FREQ_CNTRL _MMIO(0x65900) +#define AUD_PIN_BUF_CTL _MMIO(0x48414) +#define AUD_PIN_BUF_ENABLE REG_BIT(31) + /* * HSW - ICL power wells * @@ -9119,7 +9328,8 @@ enum { #define GLK_PW_CTL_IDX_DDI_A 1 #define SKL_PW_CTL_IDX_MISC_IO 0 -/* ICL - power wells */ +/* ICL/TGL - power wells */ +#define TGL_PW_CTL_IDX_PW_5 4 #define ICL_PW_CTL_IDX_PW_4 3 #define ICL_PW_CTL_IDX_PW_3 2 #define ICL_PW_CTL_IDX_PW_2 1 @@ -9128,13 +9338,25 @@ enum { #define ICL_PWR_WELL_CTL_AUX1 _MMIO(0x45440) #define ICL_PWR_WELL_CTL_AUX2 _MMIO(0x45444) #define ICL_PWR_WELL_CTL_AUX4 _MMIO(0x4544C) +#define TGL_PW_CTL_IDX_AUX_TBT6 14 +#define TGL_PW_CTL_IDX_AUX_TBT5 13 +#define TGL_PW_CTL_IDX_AUX_TBT4 12 #define ICL_PW_CTL_IDX_AUX_TBT4 11 +#define TGL_PW_CTL_IDX_AUX_TBT3 11 #define ICL_PW_CTL_IDX_AUX_TBT3 10 +#define TGL_PW_CTL_IDX_AUX_TBT2 10 #define ICL_PW_CTL_IDX_AUX_TBT2 9 +#define TGL_PW_CTL_IDX_AUX_TBT1 9 #define ICL_PW_CTL_IDX_AUX_TBT1 8 +#define TGL_PW_CTL_IDX_AUX_TC6 8 +#define TGL_PW_CTL_IDX_AUX_TC5 7 +#define TGL_PW_CTL_IDX_AUX_TC4 6 #define ICL_PW_CTL_IDX_AUX_F 5 +#define TGL_PW_CTL_IDX_AUX_TC3 5 #define ICL_PW_CTL_IDX_AUX_E 4 +#define TGL_PW_CTL_IDX_AUX_TC2 4 #define ICL_PW_CTL_IDX_AUX_D 3 +#define TGL_PW_CTL_IDX_AUX_TC1 3 #define ICL_PW_CTL_IDX_AUX_C 2 #define ICL_PW_CTL_IDX_AUX_B 1 #define ICL_PW_CTL_IDX_AUX_A 0 @@ -9142,9 +9364,15 @@ enum { #define ICL_PWR_WELL_CTL_DDI1 _MMIO(0x45450) #define ICL_PWR_WELL_CTL_DDI2 _MMIO(0x45454) #define ICL_PWR_WELL_CTL_DDI4 _MMIO(0x4545C) +#define TGL_PW_CTL_IDX_DDI_TC6 8 +#define TGL_PW_CTL_IDX_DDI_TC5 7 +#define TGL_PW_CTL_IDX_DDI_TC4 6 #define ICL_PW_CTL_IDX_DDI_F 5 +#define TGL_PW_CTL_IDX_DDI_TC3 5 #define ICL_PW_CTL_IDX_DDI_E 4 +#define TGL_PW_CTL_IDX_DDI_TC2 4 #define ICL_PW_CTL_IDX_DDI_D 3 +#define TGL_PW_CTL_IDX_DDI_TC1 3 #define ICL_PW_CTL_IDX_DDI_C 2 #define ICL_PW_CTL_IDX_DDI_B 1 #define ICL_PW_CTL_IDX_DDI_A 0 @@ -9219,12 +9447,20 @@ enum skl_power_gate { /* HDCP Repeater Registers */ #define HDCP_REP_CTL _MMIO(0x66d00) +#define HDCP_TRANSA_REP_PRESENT BIT(31) +#define HDCP_TRANSB_REP_PRESENT BIT(30) +#define HDCP_TRANSC_REP_PRESENT BIT(29) +#define HDCP_TRANSD_REP_PRESENT BIT(28) #define HDCP_DDIB_REP_PRESENT BIT(30) #define HDCP_DDIA_REP_PRESENT BIT(29) #define HDCP_DDIC_REP_PRESENT BIT(28) #define HDCP_DDID_REP_PRESENT BIT(27) #define HDCP_DDIF_REP_PRESENT BIT(26) #define HDCP_DDIE_REP_PRESENT BIT(25) +#define HDCP_TRANSA_SHA1_M0 (1 << 20) +#define HDCP_TRANSB_SHA1_M0 (2 << 20) +#define HDCP_TRANSC_SHA1_M0 (3 << 20) +#define HDCP_TRANSD_SHA1_M0 (4 << 20) #define HDCP_DDIB_SHA1_M0 (1 << 20) #define HDCP_DDIA_SHA1_M0 (2 << 20) #define HDCP_DDIC_SHA1_M0 (3 << 20) @@ -9264,15 +9500,92 @@ enum skl_power_gate { _PORTE_HDCP_AUTHENC, \ _PORTF_HDCP_AUTHENC) + (x)) #define PORT_HDCP_CONF(port) _PORT_HDCP_AUTHENC(port, 0x0) +#define _TRANSA_HDCP_CONF 0x66400 +#define _TRANSB_HDCP_CONF 0x66500 +#define TRANS_HDCP_CONF(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_CONF, \ + _TRANSB_HDCP_CONF) +#define HDCP_CONF(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_CONF(trans) : \ + PORT_HDCP_CONF(port)) + #define HDCP_CONF_CAPTURE_AN BIT(0) #define HDCP_CONF_AUTH_AND_ENC (BIT(1) | BIT(0)) #define PORT_HDCP_ANINIT(port) _PORT_HDCP_AUTHENC(port, 0x4) +#define _TRANSA_HDCP_ANINIT 0x66404 +#define _TRANSB_HDCP_ANINIT 0x66504 +#define TRANS_HDCP_ANINIT(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_ANINIT, \ + _TRANSB_HDCP_ANINIT) +#define HDCP_ANINIT(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANINIT(trans) : \ + PORT_HDCP_ANINIT(port)) + #define PORT_HDCP_ANLO(port) _PORT_HDCP_AUTHENC(port, 0x8) +#define _TRANSA_HDCP_ANLO 0x66408 +#define _TRANSB_HDCP_ANLO 0x66508 +#define TRANS_HDCP_ANLO(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANLO, \ + _TRANSB_HDCP_ANLO) +#define HDCP_ANLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANLO(trans) : \ + PORT_HDCP_ANLO(port)) + #define PORT_HDCP_ANHI(port) _PORT_HDCP_AUTHENC(port, 0xC) +#define _TRANSA_HDCP_ANHI 0x6640C +#define _TRANSB_HDCP_ANHI 0x6650C +#define TRANS_HDCP_ANHI(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANHI, \ + _TRANSB_HDCP_ANHI) +#define HDCP_ANHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANHI(trans) : \ + PORT_HDCP_ANHI(port)) + #define PORT_HDCP_BKSVLO(port) _PORT_HDCP_AUTHENC(port, 0x10) +#define _TRANSA_HDCP_BKSVLO 0x66410 +#define _TRANSB_HDCP_BKSVLO 0x66510 +#define TRANS_HDCP_BKSVLO(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVLO, \ + _TRANSB_HDCP_BKSVLO) +#define HDCP_BKSVLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVLO(trans) : \ + PORT_HDCP_BKSVLO(port)) + #define PORT_HDCP_BKSVHI(port) _PORT_HDCP_AUTHENC(port, 0x14) +#define _TRANSA_HDCP_BKSVHI 0x66414 +#define _TRANSB_HDCP_BKSVHI 0x66514 +#define TRANS_HDCP_BKSVHI(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVHI, \ + _TRANSB_HDCP_BKSVHI) +#define HDCP_BKSVHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVHI(trans) : \ + PORT_HDCP_BKSVHI(port)) + #define PORT_HDCP_RPRIME(port) _PORT_HDCP_AUTHENC(port, 0x18) +#define _TRANSA_HDCP_RPRIME 0x66418 +#define _TRANSB_HDCP_RPRIME 0x66518 +#define TRANS_HDCP_RPRIME(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_RPRIME, \ + _TRANSB_HDCP_RPRIME) +#define HDCP_RPRIME(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_RPRIME(trans) : \ + PORT_HDCP_RPRIME(port)) + #define PORT_HDCP_STATUS(port) _PORT_HDCP_AUTHENC(port, 0x1C) +#define _TRANSA_HDCP_STATUS 0x6641C +#define _TRANSB_HDCP_STATUS 0x6651C +#define TRANS_HDCP_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_STATUS, \ + _TRANSB_HDCP_STATUS) +#define HDCP_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_STATUS(trans) : \ + PORT_HDCP_STATUS(port)) + #define HDCP_STATUS_STREAM_A_ENC BIT(31) #define HDCP_STATUS_STREAM_B_ENC BIT(30) #define HDCP_STATUS_STREAM_C_ENC BIT(29) @@ -9299,28 +9612,50 @@ enum skl_power_gate { _PORTD_HDCP2_BASE, \ _PORTE_HDCP2_BASE, \ _PORTF_HDCP2_BASE) + (x)) - -#define HDCP2_AUTH_DDI(port) _PORT_HDCP2_BASE(port, 0x98) +#define PORT_HDCP2_AUTH(port) _PORT_HDCP2_BASE(port, 0x98) +#define _TRANSA_HDCP2_AUTH 0x66498 +#define _TRANSB_HDCP2_AUTH 0x66598 +#define TRANS_HDCP2_AUTH(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_AUTH, \ + _TRANSB_HDCP2_AUTH) #define AUTH_LINK_AUTHENTICATED BIT(31) #define AUTH_LINK_TYPE BIT(30) #define AUTH_FORCE_CLR_INPUTCTR BIT(19) #define AUTH_CLR_KEYS BIT(18) - -#define HDCP2_CTL_DDI(port) _PORT_HDCP2_BASE(port, 0xB0) +#define HDCP2_AUTH(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_AUTH(trans) : \ + PORT_HDCP2_AUTH(port)) + +#define PORT_HDCP2_CTL(port) _PORT_HDCP2_BASE(port, 0xB0) +#define _TRANSA_HDCP2_CTL 0x664B0 +#define _TRANSB_HDCP2_CTL 0x665B0 +#define TRANS_HDCP2_CTL(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_CTL, \ + _TRANSB_HDCP2_CTL) #define CTL_LINK_ENCRYPTION_REQ BIT(31) - -#define HDCP2_STATUS_DDI(port) _PORT_HDCP2_BASE(port, 0xB4) -#define STREAM_ENCRYPTION_STATUS_A BIT(31) -#define STREAM_ENCRYPTION_STATUS_B BIT(30) -#define STREAM_ENCRYPTION_STATUS_C BIT(29) +#define HDCP2_CTL(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_CTL(trans) : \ + PORT_HDCP2_CTL(port)) + +#define PORT_HDCP2_STATUS(port) _PORT_HDCP2_BASE(port, 0xB4) +#define _TRANSA_HDCP2_STATUS 0x664B4 +#define _TRANSB_HDCP2_STATUS 0x665B4 +#define TRANS_HDCP2_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP2_STATUS, \ + _TRANSB_HDCP2_STATUS) #define LINK_TYPE_STATUS BIT(22) #define LINK_AUTH_STATUS BIT(21) #define LINK_ENCRYPTION_STATUS BIT(20) +#define HDCP2_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_STATUS(trans) : \ + PORT_HDCP2_STATUS(port)) /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 #define _TRANS_DDI_FUNC_CTL_C 0x62400 +#define _TRANS_DDI_FUNC_CTL_D 0x63400 #define _TRANS_DDI_FUNC_CTL_EDP 0x6F400 #define _TRANS_DDI_FUNC_CTL_DSI0 0x6b400 #define _TRANS_DDI_FUNC_CTL_DSI1 0x6bc00 @@ -9328,10 +9663,14 @@ enum skl_power_gate { #define TRANS_DDI_FUNC_ENABLE (1 << 31) /* Those bits are ignored by pipe EDP since it can only connect to DDI A */ -#define TRANS_DDI_PORT_MASK (7 << 28) #define TRANS_DDI_PORT_SHIFT 28 -#define TRANS_DDI_SELECT_PORT(x) ((x) << 28) -#define TRANS_DDI_PORT_NONE (0 << 28) +#define TGL_TRANS_DDI_PORT_SHIFT 27 +#define TRANS_DDI_PORT_MASK (7 << TRANS_DDI_PORT_SHIFT) +#define TGL_TRANS_DDI_PORT_MASK (0xf << TGL_TRANS_DDI_PORT_SHIFT) +#define TRANS_DDI_SELECT_PORT(x) ((x) << TRANS_DDI_PORT_SHIFT) +#define TGL_TRANS_DDI_SELECT_PORT(x) (((x) + 1) << TGL_TRANS_DDI_PORT_SHIFT) +#define TRANS_DDI_FUNC_CTL_VAL_TO_PORT(val) (((val) & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT) +#define TGL_TRANS_DDI_FUNC_CTL_VAL_TO_PORT(val) ((((val) & TGL_TRANS_DDI_PORT_MASK) >> TGL_TRANS_DDI_PORT_SHIFT) - 1) #define TRANS_DDI_MODE_SELECT_MASK (7 << 24) #define TRANS_DDI_MODE_SELECT_HDMI (0 << 24) #define TRANS_DDI_MODE_SELECT_DVI (1 << 24) @@ -9350,6 +9689,10 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) +#define TRANS_DDI_EDP_INPUT_D_ONOFF (7 << 12) +#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10) +#define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ + REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1 << 8) #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1 << 7) @@ -9377,7 +9720,9 @@ enum skl_power_gate { /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 #define _DP_TP_CTL_B 0x64140 +#define _TGL_DP_TP_CTL_A 0x60540 #define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B) +#define TGL_DP_TP_CTL(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_CTL_A) #define DP_TP_CTL_ENABLE (1 << 31) #define DP_TP_CTL_FEC_ENABLE (1 << 30) #define DP_TP_CTL_MODE_SST (0 << 27) @@ -9397,7 +9742,9 @@ enum skl_power_gate { /* DisplayPort Transport Status */ #define _DP_TP_STATUS_A 0x64044 #define _DP_TP_STATUS_B 0x64144 +#define _TGL_DP_TP_STATUS_A 0x60544 #define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B) +#define TGL_DP_TP_STATUS(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_STATUS_A) #define DP_TP_STATUS_FEC_ENABLE_LIVE (1 << 28) #define DP_TP_STATUS_IDLE_DONE (1 << 25) #define DP_TP_STATUS_ACT_SENT (1 << 24) @@ -9541,6 +9888,9 @@ enum skl_power_gate { /* For each transcoder, we need to select the corresponding port clock */ #define TRANS_CLK_SEL_DISABLED (0x0 << 29) #define TRANS_CLK_SEL_PORT(x) (((x) + 1) << 29) +#define TGL_TRANS_CLK_SEL_DISABLED (0x0 << 28) +#define TGL_TRANS_CLK_SEL_PORT(x) (((x) + 1) << 28) + #define CDCLK_FREQ _MMIO(0x46200) @@ -9549,17 +9899,7 @@ enum skl_power_gate { #define _TRANSC_MSA_MISC 0x62410 #define _TRANS_EDP_MSA_MISC 0x6f410 #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC) - -#define TRANS_MSA_SYNC_CLK (1 << 0) -#define TRANS_MSA_SAMPLING_444 (2 << 1) -#define TRANS_MSA_CLRSP_YCBCR (2 << 3) -#define TRANS_MSA_6_BPC (0 << 5) -#define TRANS_MSA_8_BPC (1 << 5) -#define TRANS_MSA_10_BPC (2 << 5) -#define TRANS_MSA_12_BPC (3 << 5) -#define TRANS_MSA_16_BPC (4 << 5) -#define TRANS_MSA_CEA_RANGE (1 << 3) -#define TRANS_MSA_USE_VSC_SDP (1 << 14) +/* See DP_MSA_MISC_* for the bit definitions */ /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) @@ -9600,7 +9940,10 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe) << 20) #define CDCLK_DIVMUX_CD_OVERRIDE (1 << 19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) +#define ICL_CDCLK_CD2X_PIPE(pipe) (_PICK(pipe, 0, 2, 6) << 19) #define ICL_CDCLK_CD2X_PIPE_NONE (7 << 19) +#define TGL_CDCLK_CD2X_PIPE(pipe) BXT_CDCLK_CD2X_PIPE(pipe) +#define TGL_CDCLK_CD2X_PIPE_NONE ICL_CDCLK_CD2X_PIPE_NONE #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1 << 16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) @@ -9672,17 +10015,22 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) -#define DPCLKA_CFGCR0_ICL _MMIO(0x164280) #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ (port) + 10)) -#define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) + 10)) -#define ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port) (1 << ((tc_port) == PORT_TC4 ? \ - 21 : (tc_port) + 12)) #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ (port) * 2) #define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) #define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) +#define ICL_DPCLKA_CFGCR0 _MMIO(0x164280) +#define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) (1 << _PICK(phy, 10, 11, 24)) +#define ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port) (1 << ((tc_port) < PORT_TC4 ? \ + (tc_port) + 12 : \ + (tc_port) - PORT_TC4 + 21)) +#define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) ((phy) * 2) +#define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (3 << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) +#define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) ((pll) << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) + /* CNL PLL */ #define DPLL0_ENABLE 0x46010 #define DPLL1_ENABLE 0x46014 @@ -9887,6 +10235,7 @@ enum skl_power_gate { #define DPLL_CFGCR1_PDIV_7 (8 << 2) #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) #define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) +#define TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL (0 << 0) #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) #define _ICL_DPLL0_CFGCR0 0x164000 @@ -9899,6 +10248,182 @@ enum skl_power_gate { #define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ _ICL_DPLL1_CFGCR1) +#define _TGL_DPLL0_CFGCR0 0x164284 +#define _TGL_DPLL1_CFGCR0 0x16428C +/* TODO: add DPLL4 */ +#define _TGL_TBTPLL_CFGCR0 0x16429C +#define TGL_DPLL_CFGCR0(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR0, \ + _TGL_DPLL1_CFGCR0, \ + _TGL_TBTPLL_CFGCR0) + +#define _TGL_DPLL0_CFGCR1 0x164288 +#define _TGL_DPLL1_CFGCR1 0x164290 +/* TODO: add DPLL4 */ +#define _TGL_TBTPLL_CFGCR1 0x1642A0 +#define TGL_DPLL_CFGCR1(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR1, \ + _TGL_DPLL1_CFGCR1, \ + _TGL_TBTPLL_CFGCR1) + +#define _DKL_PHY1_BASE 0x168000 +#define _DKL_PHY2_BASE 0x169000 +#define _DKL_PHY3_BASE 0x16A000 +#define _DKL_PHY4_BASE 0x16B000 +#define _DKL_PHY5_BASE 0x16C000 +#define _DKL_PHY6_BASE 0x16D000 + +/* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */ +#define _DKL_PLL_DIV0 0x200 +#define DKL_PLL_DIV0_INTEG_COEFF(x) ((x) << 16) +#define DKL_PLL_DIV0_INTEG_COEFF_MASK (0x1F << 16) +#define DKL_PLL_DIV0_PROP_COEFF(x) ((x) << 12) +#define DKL_PLL_DIV0_PROP_COEFF_MASK (0xF << 12) +#define DKL_PLL_DIV0_FBPREDIV_SHIFT (8) +#define DKL_PLL_DIV0_FBPREDIV(x) ((x) << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBPREDIV_MASK (0xF << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define DKL_PLL_DIV0_FBDIV_INT_MASK (0xFF << 0) +#define DKL_PLL_DIV0(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV0) + +#define _DKL_PLL_DIV1 0x204 +#define DKL_PLL_DIV1_IREF_TRIM(x) ((x) << 16) +#define DKL_PLL_DIV1_IREF_TRIM_MASK (0x1F << 16) +#define DKL_PLL_DIV1_TDC_TARGET_CNT(x) ((x) << 0) +#define DKL_PLL_DIV1_TDC_TARGET_CNT_MASK (0xFF << 0) +#define DKL_PLL_DIV1(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV1) + +#define _DKL_PLL_SSC 0x210 +#define DKL_PLL_SSC_IREF_NDIV_RATIO(x) ((x) << 29) +#define DKL_PLL_SSC_IREF_NDIV_RATIO_MASK (0x7 << 29) +#define DKL_PLL_SSC_STEP_LEN(x) ((x) << 16) +#define DKL_PLL_SSC_STEP_LEN_MASK (0xFF << 16) +#define DKL_PLL_SSC_STEP_NUM(x) ((x) << 11) +#define DKL_PLL_SSC_STEP_NUM_MASK (0x7 << 11) +#define DKL_PLL_SSC_EN (1 << 9) +#define DKL_PLL_SSC(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_SSC) + +#define _DKL_PLL_BIAS 0x214 +#define DKL_PLL_BIAS_FRAC_EN_H (1 << 30) +#define DKL_PLL_BIAS_FBDIV_SHIFT (8) +#define DKL_PLL_BIAS_FBDIV_FRAC(x) ((x) << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS_FBDIV_FRAC_MASK (0x3FFFFF << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_BIAS) + +#define _DKL_PLL_TDC_COLDST_BIAS 0x218 +#define DKL_PLL_TDC_SSC_STEP_SIZE(x) ((x) << 8) +#define DKL_PLL_TDC_SSC_STEP_SIZE_MASK (0xFF << 8) +#define DKL_PLL_TDC_FEED_FWD_GAIN(x) ((x) << 0) +#define DKL_PLL_TDC_FEED_FWD_GAIN_MASK (0xFF << 0) +#define DKL_PLL_TDC_COLDST_BIAS(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_TDC_COLDST_BIAS) + +#define _DKL_REFCLKIN_CTL 0x12C +/* Bits are the same as MG_REFCLKIN_CTL */ +#define DKL_REFCLKIN_CTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_REFCLKIN_CTL) + +#define _DKL_CLKTOP2_HSCLKCTL 0xD4 +/* Bits are the same as MG_CLKTOP2_HSCLKCTL */ +#define DKL_CLKTOP2_HSCLKCTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_HSCLKCTL) + +#define _DKL_CLKTOP2_CORECLKCTL1 0xD8 +/* Bits are the same as MG_CLKTOP2_CORECLKCTL1 */ +#define DKL_CLKTOP2_CORECLKCTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_CORECLKCTL1) + +#define _DKL_TX_DPCNTL0 0x2C0 +#define DKL_TX_PRESHOOT_COEFF(x) ((x) << 13) +#define DKL_TX_PRESHOOT_COEFF_MASK (0x1f << 13) +#define DKL_TX_DE_EMPHASIS_COEFF(x) ((x) << 8) +#define DKL_TX_DE_EMPAHSIS_COEFF_MASK (0x1f << 8) +#define DKL_TX_VSWING_CONTROL(x) ((x) << 0) +#define DKL_TX_VSWING_CONTROL_MASK (0x7 << 0) +#define DKL_TX_DPCNTL0(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL0) + +#define _DKL_TX_DPCNTL1 0x2C4 +/* Bits are the same as DKL_TX_DPCNTRL0 */ +#define DKL_TX_DPCNTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL1) + +#define _DKL_TX_DPCNTL2 0x2C8 +#define DKL_TX_DP20BITMODE (1 << 2) +#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL2) + +#define _DKL_TX_FW_CALIB 0x2F8 +#define DKL_TX_CFG_DISABLE_WAIT_INIT (1 << 7) +#define DKL_TX_FW_CALIB(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_FW_CALIB) + +#define _DKL_TX_PMD_LANE_SUS 0xD00 +#define DKL_TX_PMD_LANE_SUS(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_PMD_LANE_SUS) + +#define _DKL_TX_DW17 0xDC4 +#define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW17) + +#define _DKL_TX_DW18 0xDC8 +#define DKL_TX_DW18(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW18) + +#define _DKL_DP_MODE 0xA0 +#define DKL_DP_MODE(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_DP_MODE) + +#define _DKL_CMN_UC_DW27 0x36C +#define DKL_CMN_UC_DW27_UC_HEALTH (0x1 << 15) +#define DKL_CMN_UC_DW_27(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CMN_UC_DW27) + +/* + * Each Dekel PHY is addressed through a 4KB aperture. Each PHY has more than + * 4KB of register space, so a separate index is programmed in HIP_INDEX_REG0 + * or HIP_INDEX_REG1, based on the port number, to set the upper 2 address + * bits that point the 4KB window into the full PHY register space. + */ +#define _HIP_INDEX_REG0 0x1010A0 +#define _HIP_INDEX_REG1 0x1010A4 +#define HIP_INDEX_REG(tc_port) _MMIO((tc_port) < 4 ? _HIP_INDEX_REG0 \ + : _HIP_INDEX_REG1) +#define _HIP_INDEX_SHIFT(tc_port) (8 * ((tc_port) % 4)) +#define HIP_INDEX_VAL(tc_port, val) ((val) << _HIP_INDEX_SHIFT(tc_port)) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9913,6 +10438,8 @@ enum skl_power_gate { /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) #define DC_STATE_DISABLE 0 +#define DC_STATE_EN_DC3CO REG_BIT(30) +#define DC_STATE_DC3CO_STATUS REG_BIT(29) #define DC_STATE_EN_UPTO_DC5 (1 << 0) #define DC_STATE_EN_DC9 (1 << 3) #define DC_STATE_EN_UPTO_DC6 (2 << 0) @@ -10041,11 +10568,11 @@ enum skl_power_gate { #define _PIPE_A_CSC_COEFF_BV 0x49024 #define _PIPE_A_CSC_MODE 0x49028 -#define ICL_CSC_ENABLE (1 << 31) -#define ICL_OUTPUT_CSC_ENABLE (1 << 30) -#define CSC_BLACK_SCREEN_OFFSET (1 << 2) -#define CSC_POSITION_BEFORE_GAMMA (1 << 1) -#define CSC_MODE_YUV_TO_RGB (1 << 0) +#define ICL_CSC_ENABLE (1 << 31) /* icl+ */ +#define ICL_OUTPUT_CSC_ENABLE (1 << 30) /* icl+ */ +#define CSC_BLACK_SCREEN_OFFSET (1 << 2) /* ilk/snb */ +#define CSC_POSITION_BEFORE_GAMMA (1 << 1) /* pre-glk */ +#define CSC_MODE_YUV_TO_RGB (1 << 0) /* ilk/snb */ #define _PIPE_A_CSC_PREOFF_HI 0x49030 #define _PIPE_A_CSC_PREOFF_ME 0x49034 @@ -10161,6 +10688,9 @@ enum skl_power_gate { #define _PAL_PREC_GC_MAX_A 0x4A410 #define _PAL_PREC_GC_MAX_B 0x4AC10 #define _PAL_PREC_GC_MAX_C 0x4B410 +#define PREC_PAL_DATA_RED_MASK REG_GENMASK(29, 20) +#define PREC_PAL_DATA_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PAL_DATA_BLUE_MASK REG_GENMASK(9, 0) #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 @@ -10213,6 +10743,9 @@ enum skl_power_gate { #define CGM_PIPE_MODE_GAMMA (1 << 2) #define CGM_PIPE_MODE_CSC (1 << 1) #define CGM_PIPE_MODE_DEGAMMA (1 << 0) +#define CGM_PIPE_GAMMA_RED_MASK REG_GENMASK(9, 0) +#define CGM_PIPE_GAMMA_GREEN_MASK REG_GENMASK(25, 16) +#define CGM_PIPE_GAMMA_BLUE_MASK REG_GENMASK(9, 0) #define _CGM_PIPE_B_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x69900) #define _CGM_PIPE_B_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x69904) @@ -10262,6 +10795,57 @@ enum skl_power_gate { #define ICL_ESC_CLK_DIV_SHIFT 0 #define DSI_MAX_ESC_CLK 20000 /* in KHz */ +#define _DSI_CMD_FRMCTL_0 0x6b034 +#define _DSI_CMD_FRMCTL_1 0x6b834 +#define DSI_CMD_FRMCTL(port) _MMIO_PORT(port, \ + _DSI_CMD_FRMCTL_0,\ + _DSI_CMD_FRMCTL_1) +#define DSI_FRAME_UPDATE_REQUEST (1 << 31) +#define DSI_PERIODIC_FRAME_UPDATE_ENABLE (1 << 29) +#define DSI_NULL_PACKET_ENABLE (1 << 28) +#define DSI_FRAME_IN_PROGRESS (1 << 0) + +#define _DSI_INTR_MASK_REG_0 0x6b070 +#define _DSI_INTR_MASK_REG_1 0x6b870 +#define DSI_INTR_MASK_REG(port) _MMIO_PORT(port, \ + _DSI_INTR_MASK_REG_0,\ + _DSI_INTR_MASK_REG_1) + +#define _DSI_INTR_IDENT_REG_0 0x6b074 +#define _DSI_INTR_IDENT_REG_1 0x6b874 +#define DSI_INTR_IDENT_REG(port) _MMIO_PORT(port, \ + _DSI_INTR_IDENT_REG_0,\ + _DSI_INTR_IDENT_REG_1) +#define DSI_TE_EVENT (1 << 31) +#define DSI_RX_DATA_OR_BTA_TERMINATED (1 << 30) +#define DSI_TX_DATA (1 << 29) +#define DSI_ULPS_ENTRY_DONE (1 << 28) +#define DSI_NON_TE_TRIGGER_RECEIVED (1 << 27) +#define DSI_HOST_CHKSUM_ERROR (1 << 26) +#define DSI_HOST_MULTI_ECC_ERROR (1 << 25) +#define DSI_HOST_SINGL_ECC_ERROR (1 << 24) +#define DSI_HOST_CONTENTION_DETECTED (1 << 23) +#define DSI_HOST_FALSE_CONTROL_ERROR (1 << 22) +#define DSI_HOST_TIMEOUT_ERROR (1 << 21) +#define DSI_HOST_LOW_POWER_TX_SYNC_ERROR (1 << 20) +#define DSI_HOST_ESCAPE_MODE_ENTRY_ERROR (1 << 19) +#define DSI_FRAME_UPDATE_DONE (1 << 16) +#define DSI_PROTOCOL_VIOLATION_REPORTED (1 << 15) +#define DSI_INVALID_TX_LENGTH (1 << 13) +#define DSI_INVALID_VC (1 << 12) +#define DSI_INVALID_DATA_TYPE (1 << 11) +#define DSI_PERIPHERAL_CHKSUM_ERROR (1 << 10) +#define DSI_PERIPHERAL_MULTI_ECC_ERROR (1 << 9) +#define DSI_PERIPHERAL_SINGLE_ECC_ERROR (1 << 8) +#define DSI_PERIPHERAL_CONTENTION_DETECTED (1 << 7) +#define DSI_PERIPHERAL_FALSE_CTRL_ERROR (1 << 6) +#define DSI_PERIPHERAL_TIMEOUT_ERROR (1 << 5) +#define DSI_PERIPHERAL_LP_TX_SYNC_ERROR (1 << 4) +#define DSI_PERIPHERAL_ESC_MODE_ENTRY_CMD_ERR (1 << 3) +#define DSI_EOT_SYNC_ERROR (1 << 2) +#define DSI_SOT_SYNC_ERROR (1 << 1) +#define DSI_SOT_ERROR (1 << 0) + /* Gen4+ Timestamp and Pipe Frame time stamp registers */ #define GEN4_TIMESTAMP _MMIO(0x2358) #define ILK_TIMESTAMP_HI _MMIO(0x70070) @@ -10866,6 +11450,7 @@ enum skl_power_gate { #define CMD_MODE_TE_GATE (0x1 << 28) #define VIDEO_MODE_SYNC_EVENT (0x2 << 28) #define VIDEO_MODE_SYNC_PULSE (0x3 << 28) +#define TE_SOURCE_GPIO (1 << 27) #define LINK_READY (1 << 20) #define PIX_FMT_MASK (0x3 << 16) #define PIX_FMT_SHIFT 16 @@ -10896,6 +11481,7 @@ enum skl_power_gate { #define CALIBRATION_DISABLED (0x0 << 4) #define CALIBRATION_ENABLED_INITIAL_ONLY (0x2 << 4) #define CALIBRATION_ENABLED_INITIAL_PERIODIC (0x3 << 4) +#define BLANKING_PACKET_ENABLE (1 << 2) #define S3D_ORIENTATION_LANDSCAPE (1 << 1) #define EOTP_DISABLED (1 << 0) @@ -11117,19 +11703,26 @@ enum skl_power_gate { /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ -#define GEN9_GFX_MOCS(i) _MMIO(0xc800 + (i) * 4) /* Graphics MOCS registers */ -#define GEN9_MFX0_MOCS(i) _MMIO(0xc900 + (i) * 4) /* Media 0 MOCS registers */ -#define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ -#define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ -#define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ -/* Media decoder 2 MOCS registers */ -#define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) +#define __GEN9_RCS0_MOCS0 0xc800 +#define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) +#define __GEN9_VCS0_MOCS0 0xc900 +#define GEN9_MFX0_MOCS(i) _MMIO(__GEN9_VCS0_MOCS0 + (i) * 4) +#define __GEN9_VCS1_MOCS0 0xca00 +#define GEN9_MFX1_MOCS(i) _MMIO(__GEN9_VCS1_MOCS0 + (i) * 4) +#define __GEN9_VECS0_MOCS0 0xcb00 +#define GEN9_VEBOX_MOCS(i) _MMIO(__GEN9_VECS0_MOCS0 + (i) * 4) +#define __GEN9_BCS0_MOCS0 0xcc00 +#define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4) +#define __GEN11_VCS2_MOCS0 0x10000 +#define GEN11_MFX2_MOCS(i) _MMIO(__GEN11_VCS2_MOCS0 + (i) * 4) #define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) #define PMFLUSHDONE_LNICRSDROP (1 << 20) #define PMFLUSH_GAPL3UNBLOCK (1 << 21) #define PMFLUSHDONE_LNEBLK (1 << 22) +#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ @@ -11145,6 +11738,7 @@ enum skl_power_gate { #define _ICL_PHY_MISC_B 0x64C04 #define ICL_PHY_MISC(port) _MMIO_PORT(port, _ICL_PHY_MISC_A, \ _ICL_PHY_MISC_B) +#define ICL_PHY_MISC_MUX_DDID (1 << 28) #define ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN (1 << 23) /* Icelake Display Stream Compression Registers */ @@ -11454,17 +12048,33 @@ enum skl_power_gate { _ICL_DSC1_RC_BUF_THRESH_1_UDW_PB, \ _ICL_DSC1_RC_BUF_THRESH_1_UDW_PC) -#define PORT_TX_DFLEXDPSP _MMIO(FIA1_BASE + 0x008A0) -#define TC_LIVE_STATE_TBT(tc_port) (1 << ((tc_port) * 8 + 6)) -#define TC_LIVE_STATE_TC(tc_port) (1 << ((tc_port) * 8 + 5)) -#define DP_LANE_ASSIGNMENT_SHIFT(tc_port) ((tc_port) * 8) -#define DP_LANE_ASSIGNMENT_MASK(tc_port) (0xf << ((tc_port) * 8)) -#define DP_LANE_ASSIGNMENT(tc_port, x) ((x) << ((tc_port) * 8)) - -#define PORT_TX_DFLEXDPPMS _MMIO(FIA1_BASE + 0x00890) -#define DP_PHY_MODE_STATUS_COMPLETED(tc_port) (1 << (tc_port)) - -#define PORT_TX_DFLEXDPCSSS _MMIO(FIA1_BASE + 0x00894) -#define DP_PHY_MODE_STATUS_NOT_SAFE(tc_port) (1 << (tc_port)) +#define PORT_TX_DFLEXDPSP(fia) _MMIO_FIA((fia), 0x008A0) +#define MODULAR_FIA_MASK (1 << 4) +#define TC_LIVE_STATE_TBT(idx) (1 << ((idx) * 8 + 6)) +#define TC_LIVE_STATE_TC(idx) (1 << ((idx) * 8 + 5)) +#define DP_LANE_ASSIGNMENT_SHIFT(idx) ((idx) * 8) +#define DP_LANE_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 8)) +#define DP_LANE_ASSIGNMENT(idx, x) ((x) << ((idx) * 8)) + +#define PORT_TX_DFLEXDPPMS(fia) _MMIO_FIA((fia), 0x00890) +#define DP_PHY_MODE_STATUS_COMPLETED(idx) (1 << (idx)) + +#define PORT_TX_DFLEXDPCSSS(fia) _MMIO_FIA((fia), 0x00894) +#define DP_PHY_MODE_STATUS_NOT_SAFE(idx) (1 << (idx)) + +#define PORT_TX_DFLEXPA1(fia) _MMIO_FIA((fia), 0x00880) +#define DP_PIN_ASSIGNMENT_SHIFT(idx) ((idx) * 4) +#define DP_PIN_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 4)) +#define DP_PIN_ASSIGNMENT(idx, x) ((x) << ((idx) * 4)) + +/* This register controls the Display State Buffer (DSB) engines. */ +#define _DSBSL_INSTANCE_BASE 0x70B00 +#define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ + (pipe) * 0x1000 + (id) * 0x100) +#define DSB_HEAD(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x0) +#define DSB_TAIL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x4) +#define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) +#define DSB_ENABLE (1 << 31) +#define DSB_STATUS (1 << 0) #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a195a92d0105..78a5f5d3c070 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,10 +31,13 @@ #include "gem/i915_gem_context.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" +#include "gt/intel_rps.h" #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" +#include "i915_trace.h" #include "intel_pm.h" struct execute_cb { @@ -54,11 +57,13 @@ static struct i915_global_request { static const char *i915_fence_get_driver_name(struct dma_fence *fence) { - return "i915"; + return dev_name(to_request(fence)->i915->drm.dev); } static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + const struct i915_gem_context *ctx; + /* * The timeline struct (as part of the ppgtt underneath a context) * may be freed when the request is no longer in use by the GPU. @@ -71,7 +76,11 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->gem_context->name ?: "[i915]"; + ctx = i915_request_gem_context(to_request(fence)); + if (!ctx) + return "[" DRIVER_NAME "]"; + + return ctx->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -119,62 +128,73 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; -static inline void -i915_request_remove_from_client(struct i915_request *request) +static void irq_execute_cb(struct irq_work *wrk) { - struct drm_i915_file_private *file_priv; + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - file_priv = request->file_priv; - if (!file_priv) - return; + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); +} - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { - list_del(&request->client_link); - request->file_priv = NULL; - } - spin_unlock(&file_priv->mm.lock); +static void irq_execute_cb_hook(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + cb->hook(container_of(cb->fence, struct i915_request, submit), + &cb->signal->fence); + i915_request_put(cb->signal); + + irq_execute_cb(wrk); } -static void advance_ring(struct i915_request *request) +static void __notify_execute_cb(struct i915_request *rq) { - struct intel_ring *ring = request->ring; - unsigned int tail; + struct execute_cb *cb; + + lockdep_assert_held(&rq->lock); + + if (list_empty(&rq->execute_cb)) + return; + + list_for_each_entry(cb, &rq->execute_cb, link) + irq_work_queue(&cb->work); /* - * We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. + * XXX Rollback on __i915_request_unsubmit() * - * Note this requires that we are always called in request - * completion order. + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. */ - GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); - if (list_is_last(&request->ring_link, &ring->request_list)) { - /* - * We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->tail); - list_del(&ring->active_link); - } else { - tail = request->postfix; - } - list_del_init(&request->ring_link); + INIT_LIST_HEAD(&rq->execute_cb); +} + +static inline void +remove_from_client(struct i915_request *request) +{ + struct drm_i915_file_private *file_priv; + + if (!READ_ONCE(request->file_priv)) + return; - ring->head = tail; + rcu_read_lock(); + file_priv = xchg(&request->file_priv, NULL); + if (file_priv) { + spin_lock(&file_priv->mm.lock); + list_del(&request->client_link); + spin_unlock(&file_priv->mm.lock); + } + rcu_read_unlock(); } static void free_capture_list(struct i915_request *request) { struct i915_capture_list *capture; - capture = request->capture_list; + capture = fetch_and_zero(&request->capture_list); while (capture) { struct i915_capture_list *next = capture->next; @@ -183,79 +203,83 @@ static void free_capture_list(struct i915_request *request) } } -static bool i915_request_retire(struct i915_request *rq) +static void remove_from_engine(struct i915_request *rq) { - struct i915_active_request *active, *next; + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + spin_unlock_irq(&locked->active.lock); +} - lockdep_assert_held(&rq->i915->drm.struct_mutex); +bool i915_request_retire(struct i915_request *rq) +{ if (!i915_request_completed(rq)) return false; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - rq->engine->name, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); + RQ_TRACE(rq, "\n"); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); - advance_ring(rq); - /* - * Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). + * Note this requires that we are always called in request + * completion order. */ - list_for_each_entry_safe(active, next, &rq->active_list, link) { - /* - * In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of - * i915_active_request. So we try to keep this loop as - * streamlined as possible and also prefetch the next - * i915_active_request to try and hide the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, rq); - } - - local_irq_disable(); + GEM_BUG_ON(!list_is_first(&rq->link, + &i915_request_timeline(rq)->requests)); + rq->ring->head = rq->postfix; - spin_lock(&rq->engine->active.lock); - list_del(&rq->sched.link); - spin_unlock(&rq->engine->active.lock); + /* + * We only loosely track inflight requests across preemption, + * and so we may find ourselves attempting to retire a _completed_ + * request that we have removed from the HW and put back on a run + * queue. + */ + remove_from_engine(rq); - spin_lock(&rq->lock); + spin_lock_irq(&rq->lock); i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); - if (rq->waitboost) { - GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); - atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + if (i915_request_has_waitboost(rq)) { + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + atomic_dec(&rq->engine->gt->rps.num_waiters); } - spin_unlock(&rq->lock); - - local_irq_enable(); - - intel_context_exit(rq->hw_context); - intel_context_unpin(rq->hw_context); + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + __notify_execute_cb(rq); + } + GEM_BUG_ON(!list_empty(&rq->execute_cb)); + spin_unlock_irq(&rq->lock); - i915_request_remove_from_client(rq); + remove_from_client(rq); list_del(&rq->link); + intel_context_exit(rq->context); + intel_context_unpin(rq->context); + free_capture_list(rq); i915_sched_node_fini(&rq->sched); i915_request_put(rq); @@ -265,76 +289,24 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_ring *ring = rq->ring; + struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - rq->engine->name, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); + RQ_TRACE(rq, "\n"); - lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->ring_link)) - return; - do { - tmp = list_first_entry(&ring->request_list, - typeof(*tmp), ring_link); + tmp = list_first_entry(&tl->requests, typeof(*tmp), link); } while (i915_request_retire(tmp) && tmp != rq); } -static void irq_execute_cb(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); -} - -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); -} - -static void __notify_execute_cb(struct i915_request *rq) -{ - struct execute_cb *cb; - - lockdep_assert_held(&rq->lock); - - if (list_empty(&rq->execute_cb)) - return; - - list_for_each_entry(cb, &rq->execute_cb, link) - irq_work_queue(&cb->work); - - /* - * XXX Rollback on __i915_request_unsubmit() - * - * In the future, perhaps when we have an active time-slicing scheduler, - * it will be interesting to unsubmit parallel execution and remove - * busywaits from the GPU until their master is restarted. This is - * quite hairy, we have to carefully rollback the fence and do a - * preempt-to-idle cycle on the target engine, all the while the - * master execute_cb may refire. - */ - INIT_LIST_HEAD(&rq->execute_cb); -} - static int -__i915_request_await_execution(struct i915_request *rq, - struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), - gfp_t gfp) +__await_execution(struct i915_request *rq, + struct i915_request *signal, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal), + gfp_t gfp) { struct execute_cb *cb; @@ -371,22 +343,41 @@ __i915_request_await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); + /* Copy across semaphore status as we need the same behaviour */ + rq->sched.flags |= signal->sched.flags; return 0; } -void __i915_request_submit(struct i915_request *request) +bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + bool result = false; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - engine->name, - request->fence.context, request->fence.seqno, - hwsp_seqno(request)); + RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); - if (i915_gem_context_is_banned(request->gem_context)) + /* + * With the advent of preempt-to-busy, we frequently encounter + * requests that we have unsubmitted from HW, but left running + * until the next ack and so have completed in the meantime. On + * resubmission of that completed request, we can skip + * updating the payload, and execlists can even skip submitting + * the request. + * + * We must remove the request from the caller's priority queue, + * and the caller must only call us when the request is in their + * priority queue, under the active.lock. This ensures that the + * request has *not* yet been retired and we can safely move + * the request into the engine->active.list where it will be + * dropped upon retiring. (Otherwise if resubmit a *retired* + * request, this would be a horrible use-after-free.) + */ + if (i915_request_completed(request)) + goto xfer; + + if (intel_context_is_banned(request->context)) i915_request_skip(request, -EIO); /* @@ -409,29 +400,31 @@ void __i915_request_submit(struct i915_request *request) i915_sw_fence_signaled(&request->semaphore)) engine->saturated |= request->sched.semaphores; - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); - list_move_tail(&request->sched.link, &engine->active.requests); + trace_i915_request_execute(request); + engine->serial++; + result = true; - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); +xfer: /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { + list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); __notify_execute_cb(request); spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request, - request->ring->vaddr + request->postfix); - - engine->serial++; - - trace_i915_request_execute(request); + return result; } void i915_request_submit(struct i915_request *request) @@ -451,10 +444,7 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - engine->name, - request->fence.context, request->fence.seqno, - hwsp_seqno(request)); + RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); @@ -512,6 +502,10 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_request_submit(request); + + if (unlikely(fence->error)) + i915_request_skip(request, fence->error); + /* * We need to serialize use of the submit_request() callback * with its hotplugging performed during an emergency @@ -552,29 +546,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void ring_retire_requests(struct intel_ring *ring) +static void retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) + list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) break; } static noinline struct i915_request * -request_alloc_slow(struct intel_context *ce, gfp_t gfp) +request_alloc_slow(struct intel_timeline *tl, gfp_t gfp) { - struct intel_ring *ring = ce->ring; struct i915_request *rq; - if (list_empty(&ring->request_list)) + if (list_empty(&tl->requests)) goto out; if (!gfpflags_allow_blocking(gfp)) goto out; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); rq = kmem_cache_alloc(global.slab_requests, @@ -583,20 +576,35 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) return rq; /* Ratelimit ourselves to prevent oom from malicious clients */ - rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_last_entry(&tl->requests, typeof(*rq), link); cond_synchronize_rcu(rq->rcustate); /* Retire our old requests in the hope that we free some */ - ring_retire_requests(ring); + retire_requests(tl); out: return kmem_cache_alloc(global.slab_requests, gfp); } +static void __i915_request_ctor(void *arg) +{ + struct i915_request *rq = arg; + + spin_lock_init(&rq->lock); + i915_sched_node_init(&rq->sched); + i915_sw_fence_init(&rq->submit, submit_notify); + i915_sw_fence_init(&rq->semaphore, semaphore_notify); + + rq->file_priv = NULL; + rq->capture_list = NULL; + + INIT_LIST_HEAD(&rq->execute_cb); +} + struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct i915_timeline *tl = ce->ring->timeline; + struct intel_timeline *tl = ce->timeline; struct i915_request *rq; u32 seqno; int ret; @@ -638,46 +646,43 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq = kmem_cache_alloc(global.slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = request_alloc_slow(ce, gfp); + rq = request_alloc_slow(tl, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - ret = i915_timeline_get_seqno(tl, rq, &seqno); + ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; rq->i915 = ce->engine->i915; - rq->hw_context = ce; - rq->gem_context = ce->gem_context; + rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; - rq->timeline = tl; + rq->execution_mask = ce->engine->mask; + + RCU_INIT_POINTER(rq->timeline, tl); + RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - rq->hwsp_cacheline = tl->hwsp_cacheline; + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ - spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, tl->fence_context, seqno); /* We bump the ref for the fence chain */ - i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); - i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify); + i915_sw_fence_reinit(&i915_request_get(rq)->submit); + i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); - i915_sched_node_init(&rq->sched); + i915_sched_node_reinit(&rq->sched); - /* No zalloc, must clear what we need by hand */ - rq->file_priv = NULL; + /* No zalloc, everything must be cleared after use */ rq->batch = NULL; - rq->capture_list = NULL; - rq->waitboost = false; - rq->execution_mask = ALL_ENGINES; - - INIT_LIST_HEAD(&rq->active_list); - INIT_LIST_HEAD(&rq->execute_cb); + GEM_BUG_ON(rq->file_priv); + GEM_BUG_ON(rq->capture_list); + GEM_BUG_ON(!list_empty(&rq->execute_cb)); /* * Reserve space in the ring buffer for all the commands required to @@ -715,7 +720,6 @@ err_unwind: ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&rq->active_list)); GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); @@ -730,15 +734,15 @@ struct i915_request * i915_request_create(struct intel_context *ce) { struct i915_request *rq; - int err; + struct intel_timeline *tl; - err = intel_context_timeline_lock(ce); - if (err) - return ERR_PTR(err); + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return ERR_CAST(tl); /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) + rq = list_first_entry(&tl->requests, typeof(*rq), link); + if (!list_is_last(&rq->link, &tl->requests)) i915_request_retire(rq); intel_context_enter(ce); @@ -748,28 +752,58 @@ i915_request_create(struct intel_context *ce) goto err_unlock; /* Check that we do not interrupt ourselves with a new request */ - rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); return rq; err_unlock: - intel_context_timeline_unlock(ce); + intel_context_timeline_unlock(tl); return rq; } static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->ring_link, &signal->ring->request_list)) - return 0; + struct dma_fence *fence; + int err; + + GEM_BUG_ON(i915_request_timeline(rq) == + rcu_access_pointer(signal->timeline)); + + fence = NULL; + rcu_read_lock(); + spin_lock_irq(&signal->lock); + if (!i915_request_started(signal) && + !list_is_first(&signal->link, + &rcu_dereference(signal->timeline)->requests)) { + struct i915_request *prev = list_prev_entry(signal, link); - signal = list_prev_entry(signal, ring_link); - if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + /* + * Peek at the request before us in the timeline. That + * request will only be valid before it is retired, so + * after acquiring a reference to it, confirm that it is + * still part of the signaler's timeline. + */ + if (i915_request_get_rcu(prev)) { + if (list_next_entry(prev, link) == signal) + fence = &prev->fence; + else + i915_request_put(prev); + } + } + spin_unlock_irq(&signal->lock); + rcu_read_unlock(); + if (!fence) return 0; - return i915_sw_fence_await_dma_fence(&rq->submit, - &signal->fence, 0, - I915_FENCE_GFP); + err = 0; + if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + err = i915_sw_fence_await_dma_fence(&rq->submit, + fence, 0, + I915_FENCE_GFP); + dma_fence_put(fence); + + return err; } static intel_engine_mask_t @@ -791,38 +825,27 @@ already_busywaiting(struct i915_request *rq) } static int -emit_semaphore_wait(struct i915_request *to, - struct i915_request *from, - gfp_t gfp) +__emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + u32 seqno) { + const int has_token = INTEL_GEN(to->i915) >= 12; u32 hwsp_offset; + int len, err; u32 *cs; - int err; - GEM_BUG_ON(!from->timeline->has_initial_breadcrumb); GEM_BUG_ON(INTEL_GEN(to->i915) < 8); - /* Just emit the first semaphore we see as request space is limited. */ - if (already_busywaiting(to) & from->engine->mask) - return i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); - - err = i915_request_await_start(to, from); - if (err < 0) - return err; - - /* Only submit our spinner after the signaler is running! */ - err = __i915_request_await_execution(to, from, NULL, gfp); - if (err) - return err; - /* We need to pin the signaler's HWSP until we are finished reading. */ - err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); if (err) return err; - cs = intel_ring_begin(to, 4); + len = 4; + if (has_token) + len += 2; + + cs = intel_ring_begin(to, len); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -834,18 +857,50 @@ emit_semaphore_wait(struct i915_request *to, * (post-wrap) values than they were expecting (and so wait * forever). */ - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD; - *cs++ = from->fence.seqno; + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD) + + has_token; + *cs++ = seqno; *cs++ = hwsp_offset; *cs++ = 0; + if (has_token) { + *cs++ = 0; + *cs++ = MI_NOOP; + } intel_ring_advance(to, cs); + return 0; +} + +static int +emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + gfp_t gfp) +{ + /* Just emit the first semaphore we see as request space is limited. */ + if (already_busywaiting(to) & from->engine->mask) + goto await_fence; + + if (i915_request_await_start(to, from) < 0) + goto await_fence; + + /* Only submit our spinner after the signaler is running! */ + if (__await_execution(to, from, NULL, gfp)) + goto await_fence; + + if (__emit_semaphore_wait(to, from, from->fence.seqno)) + goto await_fence; + to->sched.semaphores |= from->engine->mask; to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; return 0; + +await_fence: + return i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); } static int @@ -865,18 +920,16 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (to->engine == from->engine) { + if (to->engine == from->engine) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); - } else if (intel_engine_has_semaphores(to->engine) && - to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + else if (intel_context_use_semaphores(to->context)) ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - } else { + else ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, I915_FENCE_GFP); - } if (ret < 0) return ret; @@ -916,8 +969,10 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + i915_sw_fence_set_error_once(&rq->submit, fence->error); continue; + } /* * Requests on the same timeline are explicitly ordered, along @@ -928,27 +983,80 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) continue; /* Squash repeated waits to the same timelines */ - if (fence->context != rq->i915->mm.unordered_timeline && - i915_timeline_sync_is_later(rq->timeline, fence)) + if (fence->context && + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) continue; if (dma_fence_is_i915(fence)) ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, + fence->context ? I915_FENCE_TIMEOUT : 0, I915_FENCE_GFP); if (ret < 0) return ret; /* Record the latest fence used against each timeline */ - if (fence->context != rq->i915->mm.unordered_timeline) - i915_timeline_sync_set(rq->timeline, fence); + if (fence->context) + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + +static int +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) +{ + int err; + + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) + return 0; + + /* Ensure both start together [after all semaphores in signal] */ + if (intel_engine_has_semaphores(to->engine)) + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + else + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* Couple the dependency tree for PI on this exposed to->fence */ + if (to->engine->schedule) { + err = i915_sched_node_add_dependency(&to->sched, &from->sched); + if (err < 0) + return err; + } + + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence, @@ -971,8 +1079,10 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + i915_sw_fence_set_error_once(&rq->submit, fence->error); continue; + } /* * We don't squash repeated fence dependencies here as we @@ -982,8 +1092,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, to_request(fence), - hook, - I915_FENCE_GFP); + hook); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, I915_FENCE_TIMEOUT, @@ -1027,7 +1136,7 @@ i915_request_await_object(struct i915_request *to, struct dma_fence **shared; unsigned int count, i; - ret = reservation_object_get_fences_rcu(obj->base.resv, + ret = dma_resv_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -1044,7 +1153,7 @@ i915_request_await_object(struct i915_request *to, dma_fence_put(shared[i]); kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_rcu(obj->base.resv); } if (excl) { @@ -1065,6 +1174,9 @@ void i915_request_skip(struct i915_request *rq, int error) GEM_BUG_ON(!IS_ERR_VALUE((long)error)); dma_fence_set_error(&rq->fence, error); + if (rq->infix == rq->postfix) + return; + /* * As this request likely depends on state from the lost * context, clear out all the user operations leaving the @@ -1076,12 +1188,13 @@ void i915_request_skip(struct i915_request *rq, int error) head = 0; } memset(vaddr + head, 0, rq->postfix - head); + rq->infix = rq->postfix; } static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct i915_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; /* @@ -1104,7 +1217,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = rcu_dereference_protected(timeline->last_request.request, 1); + prev = to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1129,7 +1243,6 @@ __i915_request_add_to_timeline(struct i915_request *rq) * us, the timeline will hold its seqno which is later than ours. */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); - __i915_active_request_set(&timeline->last_request, rq); return prev; } @@ -1143,11 +1256,9 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_ring *ring = rq->ring; - struct i915_request *prev; u32 *cs; - GEM_TRACE("%s fence %llx:%lld\n", - engine->name, rq->fence.context, rq->fence.seqno); + RQ_TRACE(rq, "\n"); /* * To ensure that this call will not fail, space for its emissions @@ -1156,6 +1267,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ GEM_BUG_ON(rq->reserved_space > ring->space); rq->reserved_space = 0; + rq->emitted_jiffies = jiffies; /* * Record the position of the start of the breadcrumb so that @@ -1167,13 +1279,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) GEM_BUG_ON(IS_ERR(cs)); rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(rq); - - list_add_tail(&rq->ring_link, &ring->request_list); - if (list_is_first(&rq->ring_link, &ring->request_list)) - list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->emitted_jiffies = jiffies; + return __i915_request_add_to_timeline(rq); +} +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ /* * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -1185,56 +1296,56 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - local_bh_disable(); i915_sw_fence_commit(&rq->semaphore); - rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) { - struct i915_sched_attr attr = rq->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - - engine->schedule(rq, &attr); - } - rcu_read_unlock(); + if (attr && rq->engine->schedule) + rq->engine->schedule(rq, attr); i915_sw_fence_commit(&rq->submit); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - return prev; } void i915_request_add(struct i915_request *rq) { + struct intel_timeline * const tl = i915_request_timeline(rq); + struct i915_sched_attr attr = {}; struct i915_request *prev; - lockdep_assert_held(&rq->timeline->mutex); - lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + lockdep_assert_held(&tl->mutex); + lockdep_unpin_lock(&tl->mutex, rq->cookie); trace_i915_request_add(rq); prev = __i915_request_commit(rq); + if (rcu_access_pointer(rq->context->gem_context)) + attr = i915_request_gem_context(rq)->sched; + + /* + * Boost actual workloads past semaphores! + * + * With semaphores we spin on one engine waiting for another, + * simply to reduce the latency of starting our work when + * the signaler completes. However, if there is any other + * work that we could be doing on this engine instead, that + * is better utilisation and will reduce the overall duration + * of the current work. To avoid PI boosting a semaphore + * far in the distance past over useful work, we keep a history + * of any semaphore use along our dependency chain. + */ + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + attr.priority |= I915_PRIORITY_NOSEMAPHORE; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (list_empty(&rq->sched.signalers_list)) + attr.priority |= I915_PRIORITY_WAIT; + + local_bh_disable(); + __i915_request_queue(rq, &attr); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it @@ -1252,10 +1363,12 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev)) + if (prev && + i915_request_completed(prev) && + rcu_access_pointer(prev->timeline) == tl) i915_request_retire_upto(prev); - mutex_unlock(&rq->timeline->mutex); + mutex_unlock(&tl->mutex); } static unsigned long local_clock_us(unsigned int *cpu) @@ -1390,8 +1503,7 @@ long i915_request_wait(struct i915_request *rq, * serialise wait/reset with an explicit lock, we do want * lockdep to detect potential dependency cycles. */ - mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, - 0, 0, _THIS_IP_); + mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_); /* * Optimistic spin before touching IRQs. @@ -1416,7 +1528,7 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (CONFIG_DRM_I915_SPIN_REQUEST && + if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) && __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) { dma_fence_signal(&rq->fence); goto out; @@ -1436,7 +1548,7 @@ long i915_request_wait(struct i915_request *rq, */ if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } @@ -1447,8 +1559,10 @@ long i915_request_wait(struct i915_request *rq, for (;;) { set_current_state(state); - if (i915_request_completed(rq)) + if (i915_request_completed(rq)) { + dma_fence_signal(&rq->fence); break; + } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1460,6 +1574,7 @@ long i915_request_wait(struct i915_request *rq, break; } + intel_engine_flush_submission(rq->engine); timeout = io_schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); @@ -1467,27 +1582,11 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: - mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); + mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); trace_i915_request_wait_end(rq); return timeout; } -bool i915_retire_requests(struct drm_i915_private *i915) -{ - struct intel_ring *ring, *tmp; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry_safe(ring, tmp, - &i915->gt.active_rings, active_link) { - intel_ring_get(ring); /* last rq holds reference! */ - ring_retire_requests(ring); - intel_ring_put(ring); - } - - return !list_empty(&i915->gt.active_rings); -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" @@ -1514,10 +1613,14 @@ static struct i915_global_request global = { { int __init i915_global_request_init(void) { - global.slab_requests = KMEM_CACHE(i915_request, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_TYPESAFE_BY_RCU); + global.slab_requests = + kmem_cache_create("i915_request", + sizeof(struct i915_request), + __alignof__(struct i915_request), + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_TYPESAFE_BY_RCU, + __i915_request_ctor); if (!global.slab_requests) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index edbbdfec24ab..f57eadcf3583 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,7 +28,10 @@ #include <linux/dma-fence.h> #include <linux/lockdep.h> +#include "gem/i915_gem_context_types.h" +#include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" +#include "gt/intel_timeline_types.h" #include "i915_gem.h" #include "i915_scheduler.h" @@ -40,14 +43,19 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; -struct i915_timeline; -struct i915_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; }; +#define RQ_TRACE(rq, fmt, ...) do { \ + const struct i915_request *rq__ = (rq); \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ + rq__->fence.context, rq__->fence.seqno, \ + hwsp_seqno(rq__), ##__VA_ARGS__); \ +} while (0) + enum { /* * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. @@ -63,12 +71,63 @@ enum { I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, /* + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution + * + * Using the scheduler, when a request is ready for execution it is put + * into the priority queue, and removed from that queue when transferred + * to the HW runlists. We want to track its membership within the + * priority queue so that we can easily check before rescheduling. + * + * See i915_request_in_priority_queue() + */ + I915_FENCE_FLAG_PQUEUE, + + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * * Internal bookkeeping used by the breadcrumb code to track when * a request is on the various signal_list. */ I915_FENCE_FLAG_SIGNAL, + + /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + + /* + * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted + * + * The execution of some requests should not be interrupted. This is + * a sensitive operation as it makes the request super important, + * blocking other higher priority work. Abuse of this flag will + * lead to quality of service issues. + */ + I915_FENCE_FLAG_NOPREEMPT, + + /* + * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue + * + * A high priority sentinel request may be submitted to clear the + * submission queue. As it will be the only request in-flight, upon + * execution all other active requests will have been preempted and + * unsubmitted. This preemptive pulse is used to re-evaluate the + * in-flight requests, particularly in cases where an active context + * is banned and those active requests need to be cancelled. + */ + I915_FENCE_FLAG_SENTINEL, + + /* + * I915_FENCE_FLAG_BOOST - upclock the gpu for this request + * + * Some requests are more important than others! In particular, a + * request that the user is waiting on is typically required for + * interactive latency, for which we want to minimise by upclocking + * the GPU. Here we track such boost requests on a per-request basis. + */ + I915_FENCE_FLAG_BOOST, }; /** @@ -108,11 +167,10 @@ struct i915_request { * i915_request_free() will then decrement the refcount on the * context. */ - struct i915_gem_context *gem_context; struct intel_engine_cs *engine; - struct intel_context *hw_context; + struct intel_context *context; struct intel_ring *ring; - struct i915_timeline *timeline; + struct intel_timeline __rcu *timeline; struct list_head signal_link; /* @@ -143,6 +201,10 @@ struct i915_request { union { wait_queue_entry_t submitq; struct i915_sw_dma_fence_cb dmaq; + struct i915_request_duration_cb { + struct dma_fence_cb cb; + ktime_t emitted; + } duration; }; struct list_head execute_cb; struct i915_sw_fence semaphore; @@ -175,7 +237,7 @@ struct i915_request { * inside the timeline's HWSP vma, but it is only valid while this * request has not completed and guarded by the timeline mutex. */ - struct i915_timeline_cacheline *hwsp_cacheline; + struct intel_timeline_cacheline __rcu *hwsp_cacheline; /** Position in the ring of the start of the request */ u32 head; @@ -210,19 +272,13 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; - struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - bool waitboost; - /** timeline->request entry for this request */ struct list_head link; - /** ring->request_list entry for this request */ - struct list_head ring_link; - struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_link; @@ -248,7 +304,10 @@ struct i915_request * __must_check i915_request_create(struct intel_context *ce); struct i915_request *__i915_request_commit(struct i915_request *request); +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr); +bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); static inline struct i915_request * @@ -290,7 +349,7 @@ int i915_request_await_execution(struct i915_request *rq, void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request); +bool __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); void i915_request_skip(struct i915_request *request, int error); @@ -307,10 +366,8 @@ long i915_request_wait(struct i915_request *rq, long timeout) __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ -#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(4) +#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ static inline bool i915_request_signaled(const struct i915_request *rq) { @@ -323,6 +380,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ @@ -416,6 +478,27 @@ static inline bool i915_request_is_running(const struct i915_request *rq) return __i915_request_has_started(rq); } +/** + * i915_request_is_running - check if the request is ready for execution + * @rq: the request + * + * Upon construction, the request is instructed to wait upon various + * signals before it is ready to be executed by the HW. That is, we do + * not want to start execution and read data before it is written. In practice, + * this is controlled with a mixture of interrupts and semaphores. Once + * the submit fence is completed, the backend scheduler will place the + * request into its queue and from there submit it for execution. So we + * can detect when a request is eligible for execution (and is under control + * of the scheduler) by querying where it is in any of the scheduler's lists. + * + * Returns true if the request is ready for execution (it may be inflight), + * false otherwise. + */ +static inline bool i915_request_is_ready(const struct i915_request *rq) +{ + return !list_empty(&rq->sched.link); +} + static inline bool i915_request_completed(const struct i915_request *rq) { if (i915_request_signaled(rq)) @@ -429,6 +512,62 @@ static inline void i915_request_mark_complete(struct i915_request *rq) rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } -bool i915_retire_requests(struct drm_i915_private *i915); +static inline bool i915_request_has_waitboost(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); +} + +static inline bool i915_request_has_nopreempt(const struct i915_request *rq) +{ + /* Preemption should only be disabled very rarely */ + return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags)); +} + +static inline bool i915_request_has_sentinel(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); +} + +static inline bool i915_request_on_hold(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); +} + +static inline void i915_request_set_hold(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline void i915_request_clear_hold(struct i915_request *rq) +{ + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline struct intel_timeline * +i915_request_timeline(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); +} + +static inline struct i915_gem_context * +i915_request_gem_context(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->context->gem_context, true); +} + +static inline struct intel_timeline * +i915_request_active_timeline(struct i915_request *rq) +{ + /* + * When in use during submission, we are protected by a guarantee that + * the context/timeline is pinned and must remain pinned until after + * this submission. + */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rq->engine->active.lock)); +} #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 6617963df9ed..b7b59328cb76 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -67,15 +67,15 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) } /** - * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table - * @__dmap: DMA address (output) + * __for_each_sgt_daddr - iterate over the device addresses of the given sg_table + * @__dp: Device address (output) * @__iter: 'struct sgt_iter' (iterator state, internal) * @__sgt: sg_table to iterate over (input) * @__step: step size */ -#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step) \ +#define __for_each_sgt_daddr(__dp, __iter, __sgt, __step) \ for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ - ((__dmap) = (__iter).dma + (__iter).curr); \ + ((__dp) = (__iter).dma + (__iter).curr), (__iter).sgp; \ (((__iter).curr += (__step)) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2e9b38bdc33c..5d96cfba40f8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -177,22 +177,51 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority | __NO_PREEMPTION; } -static void kick_submission(struct intel_engine_cs *engine, int prio) +static inline bool need_preempt(int prio, int active) { - const struct i915_request *inflight = - port_request(engine->execlists.port); + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + return prio >= max(I915_PRIORITY_NORMAL, active); +} + +static void kick_submission(struct intel_engine_cs *engine, + const struct i915_request *rq, + int prio) +{ + const struct i915_request *inflight; /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves, or if - * we expect nothing to change as a result of running the - * tasklet, i.e. we have not change the priority queue - * sufficiently to oust the running context. + * We only need to kick the tasklet once for the high priority + * new context we add into the queue. */ - if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + if (prio <= engine->execlists.queue_priority_hint) return; - tasklet_hi_schedule(&engine->execlists.tasklet); + rcu_read_lock(); + + /* Nothing currently active? We're overdue for a submission! */ + inflight = execlists_active(&engine->execlists); + if (!inflight) + goto unlock; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (inflight->context == rq->context) + goto unlock; + + engine->execlists.queue_priority_hint = prio; + if (need_preempt(prio, rq_prio(inflight))) + tasklet_hi_schedule(&engine->execlists.tasklet); + +unlock: + rcu_read_unlock(); } static void __i915_schedule(struct i915_sched_node *node, @@ -297,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, node->attr.priority = prio; - if (list_empty(&node->link)) { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ + /* + * Once the request is ready, it will be placed into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + if (list_empty(&node->link)) continue; - } - if (!intel_engine_is_virtual(engine) && - !i915_request_is_active(node_to_request(node))) { + if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, @@ -318,13 +345,8 @@ static void __i915_schedule(struct i915_sched_node *node, list_move_tail(&node->link, cache.priolist); } - if (prio <= engine->execlists.queue_priority_hint) - continue; - - engine->execlists.queue_priority_hint = prio; - /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, prio); + kick_submission(engine, node_to_request(node), prio); } spin_unlock(&engine->active.lock); @@ -350,8 +372,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) unsigned long flags; GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); - - if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + if (READ_ONCE(rq->sched.attr.priority) & bump) return; spin_lock_irqsave(&schedule_lock, flags); @@ -364,9 +385,19 @@ void i915_sched_node_init(struct i915_sched_node *node) INIT_LIST_HEAD(&node->signalers_list); INIT_LIST_HEAD(&node->waiters_list); INIT_LIST_HEAD(&node->link); + + i915_sched_node_reinit(node); +} + +void i915_sched_node_reinit(struct i915_sched_node *node) +{ node->attr.priority = I915_PRIORITY_INVALID; node->semaphores = 0; node->flags = 0; + + GEM_BUG_ON(!list_empty(&node->signalers_list)); + GEM_BUG_ON(!list_empty(&node->waiters_list)); + GEM_BUG_ON(!list_empty(&node->link)); } static struct i915_dependency * @@ -395,6 +426,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, list_add(&dep->wait_link, &signal->waiters_list); list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; + dep->waiter = node; dep->flags = flags; /* Keep track of whether anyone on this chain has a semaphore */ @@ -450,13 +482,13 @@ void i915_sched_node_fini(struct i915_sched_node *node) * so we may be called out-of-order. */ list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(dep); } + INIT_LIST_HEAD(&node->signalers_list); /* Remove ourselves from everyone who depends upon us */ list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { @@ -467,6 +499,7 @@ void i915_sched_node_fini(struct i915_sched_node *node) if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(dep); } + INIT_LIST_HEAD(&node->waiters_list); spin_unlock_irq(&schedule_lock); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 7eefccff39bf..d1dc4efef77b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -26,6 +26,7 @@ sched.link) void i915_sched_node_init(struct i915_sched_node *node); +void i915_sched_node_reinit(struct i915_sched_node *node); bool __i915_sched_node_add_dependency(struct i915_sched_node *node, struct i915_sched_node *signal, @@ -52,22 +53,4 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } -static inline bool i915_scheduler_need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, active); -} - #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 3e309631bd0b..d18e70550054 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -49,6 +49,15 @@ struct i915_sched_attr { * DAG of each request, we are able to insert it into a sorted queue when it * is ready, and are able to reorder its portion of the graph to accommodate * dynamic priority changes. + * + * Ok, there is now one active element to the "scheduler" in the backends. + * We let a new context run for a small amount of time before re-evaluating + * the run order. As we re-evaluate, we maintain the strict ordering of + * dependencies, but attempt to rotate the active contexts (the current context + * is put to the back of its priority queue, then reshuffling its dependents). + * This provides minimal timeslicing and prevents a userspace hog (e.g. + * something waiting on a user semaphore [VkEvent]) from denying service to + * others. */ struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ @@ -62,6 +71,7 @@ struct i915_sched_node { struct i915_dependency { struct i915_sched_node *signaler; + struct i915_sched_node *waiter; struct list_head signal_link; struct list_head wait_link; struct list_head dfs_link; diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 207e21b478f2..98bcb6fa0ab4 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -24,6 +24,8 @@ #ifndef __I915_SELFTEST_H__ #define __I915_SELFTEST_H__ +#include <linux/types.h> + struct pci_dev; struct drm_i915_private; @@ -34,6 +36,7 @@ struct i915_selftest { char *filter; int mock; int live; + int perf; }; #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) @@ -43,6 +46,7 @@ extern struct i915_selftest i915_selftest; int i915_mock_selftests(void); int i915_live_selftests(struct pci_dev *pdev); +int i915_perf_selftests(struct pci_dev *pdev); /* We extract the function declarations from i915_mock_selftests.h and * i915_live_selftests.h Add your unit test declarations there! @@ -59,6 +63,7 @@ int i915_live_selftests(struct pci_dev *pdev); #undef selftest #define selftest(name, func) int func(struct drm_i915_private *i915); #include "selftests/i915_live_selftests.h" +#include "selftests/i915_perf_selftests.h" #undef selftest struct i915_subtest { @@ -66,12 +71,37 @@ struct i915_subtest { const char *name; }; +int __i915_nop_setup(void *data); +int __i915_nop_teardown(int err, void *data); + +int __i915_live_setup(void *data); +int __i915_live_teardown(int err, void *data); + +int __intel_gt_live_setup(void *data); +int __intel_gt_live_teardown(int err, void *data); + int __i915_subtests(const char *caller, + int (*setup)(void *data), + int (*teardown)(int err, void *data), const struct i915_subtest *st, unsigned int count, void *data); #define i915_subtests(T, data) \ - __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + __i915_subtests(__func__, \ + __i915_nop_setup, __i915_nop_teardown, \ + T, ARRAY_SIZE(T), data) +#define i915_live_subtests(T, data) ({ \ + typecheck(struct drm_i915_private *, data); \ + __i915_subtests(__func__, \ + __i915_live_setup, __i915_live_teardown, \ + T, ARRAY_SIZE(T), data); \ +}) +#define intel_gt_live_subtests(T, data) ({ \ + typecheck(struct intel_gt *, data); \ + __i915_subtests(__func__, \ + __intel_gt_live_setup, __intel_gt_live_teardown, \ + T, ARRAY_SIZE(T), data); \ +}) #define SUBTEST(x) { x, #x } @@ -82,6 +112,7 @@ int __i915_subtests(const char *caller, static inline int i915_mock_selftests(void) { return 0; } static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } +static inline int i915_perf_selftests(struct pci_dev *pdev) { return 0; } #define I915_SELFTEST_DECLARE(x) #define I915_SELFTEST_ONLY(x) 0 diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index a08d7d16621b..8812cdd9007f 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -28,9 +28,11 @@ #include "display/intel_fbc.h" #include "display/intel_gmbus.h" +#include "display/intel_vga.h" +#include "i915_drv.h" #include "i915_reg.h" -#include "intel_drv.h" +#include "i915_suspend.h" static void i915_save_display(struct drm_i915_private *dev_priv) { @@ -56,7 +58,7 @@ static void i915_restore_display(struct drm_i915_private *dev_priv) if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL); - i915_redisable_vga(dev_priv); + intel_vga_redisable(dev_priv); } int i915_save_state(struct drm_i915_private *dev_priv) @@ -64,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - i915_save_display(dev_priv); if (IS_GEN(dev_priv, 4)) @@ -99,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i)); } - mutex_unlock(&dev_priv->drm.struct_mutex); - return 0; } @@ -109,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - if (IS_GEN(dev_priv, 4)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); @@ -144,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gmbus_reset(dev_priv); return 0; diff --git a/drivers/gpu/drm/i915/i915_suspend.h b/drivers/gpu/drm/i915/i915_suspend.h new file mode 100644 index 000000000000..3a36fb4ecc05 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_suspend.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_SUSPEND_H__ +#define __I915_SUSPEND_H__ + +struct drm_i915_private; + +int i915_save_state(struct drm_i915_private *i915); +int i915_restore_state(struct drm_i915_private *i915); + +#endif /* __I915_SUSPEND_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 5387aafd3424..51ba97daf2a0 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -7,11 +7,17 @@ #include <linux/slab.h> #include <linux/dma-fence.h> #include <linux/irq_work.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include "i915_sw_fence.h" #include "i915_selftest.h" +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) +#define I915_SW_FENCE_BUG_ON(expr) BUG_ON(expr) +#else +#define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) +#endif + #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */ static DEFINE_SPINLOCK(i915_sw_fence_lock); @@ -157,8 +163,11 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, LIST_HEAD(extra); do { - list_for_each_entry_safe(pos, next, &x->head, entry) - pos->func(pos, TASK_NORMAL, 0, &extra); + list_for_each_entry_safe(pos, next, &x->head, entry) { + pos->func(pos, + TASK_NORMAL, fence->error, + &extra); + } if (list_empty(&extra)) break; @@ -215,11 +224,21 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence, { BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK); + __init_waitqueue_head(&fence->wait, name, key); + fence->flags = (unsigned long)fn; + + i915_sw_fence_reinit(fence); +} + +void i915_sw_fence_reinit(struct i915_sw_fence *fence) +{ debug_fence_init(fence); - __init_waitqueue_head(&fence->wait, name, key); atomic_set(&fence->pending, 1); - fence->flags = (unsigned long)fn; + fence->error = 0; + + I915_SW_FENCE_BUG_ON(!fence->flags); + I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head)); } void i915_sw_fence_commit(struct i915_sw_fence *fence) @@ -230,6 +249,8 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence) static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key) { + i915_sw_fence_set_error_once(wq->private, flags); + list_del(&wq->entry); __i915_sw_fence_complete(wq->private, key); @@ -302,8 +323,10 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - if (i915_sw_fence_done(signaler)) + if (i915_sw_fence_done(signaler)) { + i915_sw_fence_set_error_once(fence, signaler->error); return 0; + } debug_fence_assert(signaler); @@ -319,6 +342,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, return -ENOMEM; i915_sw_fence_wait(signaler); + i915_sw_fence_set_error_once(fence, signaler->error); return 0; } @@ -337,7 +361,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, __add_wait_queue_entry_tail(&signaler->wait, wq); pending = 1; } else { - i915_sw_fence_wake(wq, 0, 0, NULL); + i915_sw_fence_wake(wq, 0, signaler->error, NULL); pending = 0; } spin_unlock_irqrestore(&signaler->wait.lock, flags); @@ -372,6 +396,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma, { struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + i915_sw_fence_set_error_once(cb->fence, dma->error); i915_sw_fence_complete(cb->fence); kfree(cb); } @@ -391,6 +416,7 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) cb->dma->seqno, i915_sw_fence_debug_hint(fence)); + i915_sw_fence_set_error_once(fence, -ETIMEDOUT); i915_sw_fence_complete(fence); } @@ -402,8 +428,10 @@ static void dma_i915_sw_fence_wake_timer(struct dma_fence *dma, struct i915_sw_fence *fence; fence = xchg(&cb->base.fence, NULL); - if (fence) + if (fence) { + i915_sw_fence_set_error_once(fence, dma->error); i915_sw_fence_complete(fence); + } irq_work_queue(&cb->work); } @@ -431,8 +459,10 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - if (dma_fence_is_signaled(dma)) + if (dma_fence_is_signaled(dma)) { + i915_sw_fence_set_error_once(fence, dma->error); return 0; + } cb = kmalloc(timeout ? sizeof(struct i915_sw_dma_fence_cb_timer) : @@ -442,7 +472,12 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, if (!gfpflags_allow_blocking(gfp)) return -ENOMEM; - return dma_fence_wait(dma, false); + ret = dma_fence_wait(dma, false); + if (ret) + return ret; + + i915_sw_fence_set_error_once(fence, dma->error); + return 0; } cb->fence = fence; @@ -480,6 +515,7 @@ static void __dma_i915_sw_fence_wake(struct dma_fence *dma, { struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + i915_sw_fence_set_error_once(cb->fence, dma->error); i915_sw_fence_complete(cb->fence); } @@ -491,8 +527,10 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, debug_fence_assert(fence); - if (dma_fence_is_signaled(dma)) + if (dma_fence_is_signaled(dma)) { + i915_sw_fence_set_error_once(fence, dma->error); return 0; + } cb->fence = fence; i915_sw_fence_await(fence); @@ -501,7 +539,7 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, if (ret == 0) { ret = 1; } else { - i915_sw_fence_complete(fence); + __dma_i915_sw_fence_wake(dma, &cb->base); if (ret == -ENOENT) /* fence already signaled */ ret = 0; } @@ -510,7 +548,7 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, } int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, - struct reservation_object *resv, + struct dma_resv *resv, const struct dma_fence_ops *exclude, bool write, unsigned long timeout, @@ -526,8 +564,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct dma_fence **shared; unsigned int count, i; - ret = reservation_object_get_fences_rcu(resv, - &excl, &count, &shared); + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -551,7 +588,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, dma_fence_put(shared[i]); kfree(shared); } else { - excl = reservation_object_get_excl_rcu(resv); + excl = dma_resv_get_excl_rcu(resv); } if (ret >= 0 && excl && excl->ops != exclude) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 9cb5c3b307a6..19e806ce43bc 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -16,12 +16,13 @@ #include <linux/wait.h> struct completion; -struct reservation_object; +struct dma_resv; struct i915_sw_fence { wait_queue_head_t wait; unsigned long flags; atomic_t pending; + int error; }; #define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */ @@ -53,6 +54,8 @@ do { \ __i915_sw_fence_init((fence), (fn), NULL, NULL) #endif +void i915_sw_fence_reinit(struct i915_sw_fence *fence); + #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS void i915_sw_fence_fini(struct i915_sw_fence *fence); #else @@ -82,7 +85,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, gfp_t gfp); int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, - struct reservation_object *resv, + struct dma_resv *resv, const struct dma_fence_ops *exclude, bool write, unsigned long timeout, @@ -106,4 +109,11 @@ static inline void i915_sw_fence_wait(struct i915_sw_fence *fence) wait_event(fence->wait, i915_sw_fence_done(fence)); } +static inline void +i915_sw_fence_set_error_once(struct i915_sw_fence *fence, int error) +{ + if (unlikely(error)) + cmpxchg(&fence->error, 0, error); +} + #endif /* _I915_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c new file mode 100644 index 000000000000..997b2998f1f2 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_sw_fence_work.h" + +static void fence_complete(struct dma_fence_work *f) +{ + if (f->ops->release) + f->ops->release(f); + dma_fence_signal(&f->dma); +} + +static void fence_work(struct work_struct *work) +{ + struct dma_fence_work *f = container_of(work, typeof(*f), work); + int err; + + err = f->ops->work(f); + if (err) + dma_fence_set_error(&f->dma, err); + + fence_complete(f); + dma_fence_put(&f->dma); +} + +static int __i915_sw_fence_call +fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct dma_fence_work *f = container_of(fence, typeof(*f), chain); + + switch (state) { + case FENCE_COMPLETE: + if (fence->error) + dma_fence_set_error(&f->dma, fence->error); + + if (!f->dma.error) { + dma_fence_get(&f->dma); + queue_work(system_unbound_wq, &f->work); + } else { + fence_complete(f); + } + break; + + case FENCE_FREE: + dma_fence_put(&f->dma); + break; + } + + return NOTIFY_DONE; +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "dma-fence"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + struct dma_fence_work *f = container_of(fence, typeof(*f), dma); + + return f->ops->name ?: "work"; +} + +static void fence_release(struct dma_fence *fence) +{ + struct dma_fence_work *f = container_of(fence, typeof(*f), dma); + + i915_sw_fence_fini(&f->chain); + + BUILD_BUG_ON(offsetof(typeof(*f), dma)); + dma_fence_free(&f->dma); +} + +static const struct dma_fence_ops fence_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, + .release = fence_release, +}; + +void dma_fence_work_init(struct dma_fence_work *f, + const struct dma_fence_work_ops *ops) +{ + f->ops = ops; + spin_lock_init(&f->lock); + dma_fence_init(&f->dma, &fence_ops, &f->lock, 0, 0); + i915_sw_fence_init(&f->chain, fence_notify); + INIT_WORK(&f->work, fence_work); +} + +int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal) +{ + if (!signal) + return 0; + + return __i915_sw_fence_await_dma_fence(&f->chain, signal, &f->cb); +} diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h new file mode 100644 index 000000000000..3a22b287e201 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ + +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef I915_SW_FENCE_WORK_H +#define I915_SW_FENCE_WORK_H + +#include <linux/dma-fence.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include "i915_sw_fence.h" + +struct dma_fence_work; + +struct dma_fence_work_ops { + const char *name; + int (*work)(struct dma_fence_work *f); + void (*release)(struct dma_fence_work *f); +}; + +struct dma_fence_work { + struct dma_fence dma; + spinlock_t lock; + + struct i915_sw_fence chain; + struct i915_sw_dma_fence_cb cb; + + struct work_struct work; + const struct dma_fence_work_ops *ops; +}; + +void dma_fence_work_init(struct dma_fence_work *f, + const struct dma_fence_work_ops *ops); +int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal); + +static inline void dma_fence_work_commit(struct dma_fence_work *f) +{ + i915_sw_fence_commit(&f->chain); +} + +#endif /* I915_SW_FENCE_WORK_H */ diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c new file mode 100644 index 000000000000..39c79e1c5b52 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/vga_switcheroo.h> + +#include "i915_drv.h" +#include "i915_switcheroo.h" + +static void i915_switcheroo_set_state(struct pci_dev *pdev, + enum vga_switcheroo_state state) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; + + if (!i915) { + dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); + return; + } + + if (state == VGA_SWITCHEROO_ON) { + pr_info("switched on\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* i915 resume handler doesn't set to D0 */ + pci_set_power_state(pdev, PCI_D0); + i915_resume_switcheroo(i915); + i915->drm.switch_power_state = DRM_SWITCH_POWER_ON; + } else { + pr_info("switched off\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + i915_suspend_switcheroo(i915, pmm); + i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF; + } +} + +static bool i915_switcheroo_can_switch(struct pci_dev *pdev) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return i915 && i915->drm.open_count == 0; +} + +static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { + .set_gpu_state = i915_switcheroo_set_state, + .reprobe = NULL, + .can_switch = i915_switcheroo_can_switch, +}; + +int i915_switcheroo_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + return vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); +} + +void i915_switcheroo_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_switcheroo_unregister_client(pdev); +} diff --git a/drivers/gpu/drm/i915/i915_switcheroo.h b/drivers/gpu/drm/i915/i915_switcheroo.h new file mode 100644 index 000000000000..59b6c1e07d75 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_SWITCHEROO__ +#define __I915_SWITCHEROO__ + +struct drm_i915_private; + +int i915_switcheroo_register(struct drm_i915_private *i915); +void i915_switcheroo_unregister(struct drm_i915_private *i915); + +#endif /* __I915_SWITCHEROO__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index ecac1c386109..0cef3130db05 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -30,8 +30,11 @@ #include <linux/stat.h> #include <linux/sysfs.h> +#include "gt/intel_rc6.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" -#include "intel_drv.h" +#include "i915_sysfs.h" #include "intel_pm.h" #include "intel_sideband.h" @@ -49,7 +52,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, u64 res = 0; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(dev_priv, reg); + res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -142,12 +145,12 @@ static const struct attribute_group media_rc6_attr_group = { }; #endif -static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset) +static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) { - if (!HAS_L3_DPF(dev_priv)) + if (!HAS_L3_DPF(i915)) return -EPERM; - if (offset % 4 != 0) + if (!IS_ALIGNED(offset, sizeof(u32))) return -EINVAL; if (offset >= GEN7_L3LOG_SIZE) @@ -162,31 +165,24 @@ i915_l3_read(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; int ret; - count = round_down(count, 4); - - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; + count = round_down(count, sizeof(u32)); count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); + memset(buf, 0, count); - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - if (dev_priv->l3_parity.remap_info[slice]) + spin_lock(&i915->gem.contexts.lock); + if (i915->l3_parity.remap_info[slice]) memcpy(buf, - dev_priv->l3_parity.remap_info[slice] + (offset/4), + i915->l3_parity.remap_info[slice] + offset / sizeof(u32), count); - else - memset(buf, 0, count); - - mutex_unlock(&dev->struct_mutex); + spin_unlock(&i915->gem.contexts.lock); return count; } @@ -197,46 +193,49 @@ i915_l3_write(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; - u32 **remap_info; + u32 *remap_info, *freeme = NULL; + struct i915_gem_context *ctx; int ret; - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + if (count < sizeof(u32)) + return -EINVAL; - remap_info = &dev_priv->l3_parity.remap_info[slice]; - if (!*remap_info) { - *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!*remap_info) { - ret = -ENOMEM; - goto out; - } + remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!remap_info) + return -ENOMEM; + + spin_lock(&i915->gem.contexts.lock); + + if (i915->l3_parity.remap_info[slice]) { + freeme = remap_info; + remap_info = i915->l3_parity.remap_info[slice]; + } else { + i915->l3_parity.remap_info[slice] = remap_info; } - /* TODO: Ideally we really want a GPU reset here to make sure errors - * aren't propagated. Since I cannot find a stable way to reset the GPU - * at this point it is left as a TODO. - */ - memcpy(*remap_info + (offset/4), buf, count); + count = round_down(count, sizeof(u32)); + memcpy(remap_info + offset / sizeof(u32), buf, count); /* NB: We defer the remapping until we switch to the context */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) - ctx->remap_slice |= (1<<slice); + list_for_each_entry(ctx, &i915->gem.contexts.list, link) + ctx->remap_slice |= BIT(slice); - ret = count; + spin_unlock(&i915->gem.contexts.lock); + kfree(freeme); -out: - mutex_unlock(&dev->struct_mutex); + /* + * TODO: Ideally we really want a GPU reset here to make sure errors + * aren't propagated. Since I cannot find a stable way to reset the GPU + * at this point it is left as a TODO. + */ - return ret; + return count; } static const struct bin_attribute dpf_attrs = { @@ -260,44 +259,30 @@ static const struct bin_attribute dpf_attrs_1 = { static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - intel_wakeref_t wakeref; - u32 freq; - - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - vlv_punit_get(dev_priv); - freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(dev_priv); - - freq = (freq >> 8) & 0xff; - } else { - freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); - } - - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_rps_read_actual_frequency(rps)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -305,7 +290,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; bool boost = false; ssize_t ret; u32 val; @@ -315,7 +300,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return ret; /* Validate against (static) hardware limits */ - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; @@ -335,19 +320,19 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.max_freq_softlimit)); + intel_gpu_freq(rps, rps->max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -355,19 +340,17 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { @@ -377,7 +360,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(dev_priv, val)); + intel_gpu_freq(rps, val)); rps->max_freq_softlimit = val; @@ -385,14 +368,15 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new max_delay and + /* + * We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -400,10 +384,10 @@ unlock: static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.min_freq_softlimit)); + intel_gpu_freq(rps, rps->min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -411,19 +395,17 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { @@ -437,14 +419,15 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new min_delay and + /* + * We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -466,15 +449,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp0_freq); + val = intel_gpu_freq(rps, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp1_freq); + val = intel_gpu_freq(rps, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->min_freq); + val = intel_gpu_freq(rps, rps->min_freq); else BUG(); @@ -515,15 +498,15 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct i915_gpu_state *gpu; + struct i915_gpu_coredump *gpu; ssize_t ret; gpu = i915_first_error_state(i915); if (IS_ERR(gpu)) { ret = PTR_ERR(gpu); } else if (gpu) { - ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count); - i915_gpu_state_put(gpu); + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); + i915_gpu_coredump_put(gpu); } else { const char *str = "No error state collected\n"; size_t len = strlen(str); diff --git a/drivers/gpu/drm/i915/i915_sysfs.h b/drivers/gpu/drm/i915/i915_sysfs.h new file mode 100644 index 000000000000..41afd4366416 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sysfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_SYSFS_H__ +#define __I915_SYSFS_H__ + +struct drm_i915_private; + +void i915_setup_sysfs(struct drm_i915_private *i915); +void i915_teardown_sysfs(struct drm_i915_private *i915); + +#endif /* __I915_SYSFS_H__ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h deleted file mode 100644 index 36e5e5a65155..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_TIMELINE_H -#define I915_TIMELINE_H - -#include <linux/lockdep.h> - -#include "i915_active.h" -#include "i915_syncmap.h" -#include "i915_timeline_types.h" - -int i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *tl, - struct i915_vma *hwsp); -void i915_timeline_fini(struct i915_timeline *tl); - -struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, - struct i915_vma *global_hwsp); - -static inline struct i915_timeline * -i915_timeline_get(struct i915_timeline *timeline) -{ - kref_get(&timeline->kref); - return timeline; -} - -void __i915_timeline_free(struct kref *kref); -static inline void i915_timeline_put(struct i915_timeline *timeline) -{ - kref_put(&timeline->kref, __i915_timeline_free); -} - -static inline int __i915_timeline_sync_set(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_set(&tl->sync, context, seqno); -} - -static inline int i915_timeline_sync_set(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_set(tl, fence->context, fence->seqno); -} - -static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_is_later(&tl->sync, context, seqno); -} - -static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); -} - -int i915_timeline_pin(struct i915_timeline *tl); -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno); -void i915_timeline_unpin(struct i915_timeline *tl); - -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *until, - u32 *hwsp_offset); - -void i915_timelines_init(struct drm_i915_private *i915); -void i915_timelines_park(struct drm_i915_private *i915); -void i915_timelines_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h deleted file mode 100644 index fce5cb4f1090..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2016 Intel Corporation - */ - -#ifndef __I915_TIMELINE_TYPES_H__ -#define __I915_TIMELINE_TYPES_H__ - -#include <linux/list.h> -#include <linux/kref.h> -#include <linux/mutex.h> -#include <linux/types.h> - -#include "i915_active_types.h" - -struct drm_i915_private; -struct i915_vma; -struct i915_timeline_cacheline; -struct i915_syncmap; - -struct i915_timeline { - u64 fence_context; - u32 seqno; - - struct mutex mutex; /* protects the flow of requests */ - - unsigned int pin_count; - const u32 *hwsp_seqno; - struct i915_vma *hwsp_ggtt; - u32 hwsp_offset; - - struct i915_timeline_cacheline *hwsp_cacheline; - - bool has_initial_breadcrumb; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_active_request last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - - struct list_head link; - struct drm_i915_private *i915; - - struct kref kref; -}; - -#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index cce426b23a24..233a97a2c276 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -8,11 +8,11 @@ #include <drm/drm_drv.h> +#include "display/intel_display_types.h" #include "gt/intel_engine.h" #include "i915_drv.h" #include "i915_irq.h" -#include "intel_drv.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 @@ -293,16 +293,16 @@ TRACE_EVENT(intel_update_plane, TP_STRUCT__entry( __field(enum pipe, pipe) - __field(const char *, name) __field(u32, frame) __field(u32, scanline) __array(int, src, 4) __array(int, dst, 4) + __string(name, plane->name) ), TP_fast_assign( + __assign_str(name, plane->name); __entry->pipe = crtc->pipe; - __entry->name = plane->name; __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); @@ -310,7 +310,7 @@ TRACE_EVENT(intel_update_plane, ), TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, - pipe_name(__entry->pipe), __entry->name, + pipe_name(__entry->pipe), __get_str(name), __entry->frame, __entry->scanline, DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) @@ -322,26 +322,26 @@ TRACE_EVENT(intel_disable_plane, TP_STRUCT__entry( __field(enum pipe, pipe) - __field(const char *, name) __field(u32, frame) __field(u32, scanline) + __string(name, plane->name) ), TP_fast_assign( + __assign_str(name, plane->name); __entry->pipe = crtc->pipe; - __entry->name = plane->name; __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->name, + pipe_name(__entry->pipe), __get_str(name), __entry->frame, __entry->scanline) ); /* pipe updates */ -TRACE_EVENT(i915_pipe_update_start, +TRACE_EVENT(intel_pipe_update_start, TP_PROTO(struct intel_crtc *crtc), TP_ARGS(crtc), @@ -366,7 +366,7 @@ TRACE_EVENT(i915_pipe_update_start, __entry->scanline, __entry->min, __entry->max) ); -TRACE_EVENT(i915_pipe_update_vblank_evaded, +TRACE_EVENT(intel_pipe_update_vblank_evaded, TP_PROTO(struct intel_crtc *crtc), TP_ARGS(crtc), @@ -391,7 +391,7 @@ TRACE_EVENT(i915_pipe_update_vblank_evaded, __entry->scanline, __entry->min, __entry->max) ); -TRACE_EVENT(i915_pipe_update_end, +TRACE_EVENT(intel_pipe_update_end, TP_PROTO(struct intel_crtc *crtc, u32 frame, int scanline_end), TP_ARGS(crtc, frame, scanline_end), @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -675,18 +674,16 @@ TRACE_EVENT(i915_request_queue, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; - __entry->instance = rq->engine->instance; + __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->flags) + __entry->ctx, __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_request, @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; - __entry->instance = rq->engine->instance; + __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno) + __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -749,18 +743,17 @@ TRACE_EVENT(i915_request_in, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; - __entry->instance = rq->engine->instance; + __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->prio = rq->sched.attr.priority; __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->prio, __entry->port) ); @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -780,18 +772,16 @@ TRACE_EVENT(i915_request_out, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; - __entry->instance = rq->engine->instance; + __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->completed) + __entry->ctx, __entry->seqno, __entry->completed) ); #else @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -845,17 +834,16 @@ TRACE_EVENT(i915_request_wait_begin, */ TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; - __entry->instance = rq->engine->instance; + __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->flags) ); @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) - __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->hw_id = ctx->hw_id; - __entry->vm = ctx->vm; + __entry->vm = rcu_access_pointer(ctx->vm); ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", - __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p", + __entry->dev, __entry->ctx, __entry->vm) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c new file mode 100644 index 000000000000..c47261ae86ea --- /dev/null +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_drv.h> + +#include "i915_drv.h" +#include "i915_utils.h" + +#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI" +#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \ + "providing the dmesg log by booting with drm.debug=0xf" + +void +__i915_printk(struct drm_i915_private *dev_priv, const char *level, + const char *fmt, ...) +{ + static bool shown_bug_once; + struct device *kdev = dev_priv->drm.dev; + bool is_error = level[1] <= KERN_ERR[1]; + bool is_debug = level[1] == KERN_DEBUG[1]; + struct va_format vaf; + va_list args; + + if (is_debug && !drm_debug_enabled(DRM_UT_DRIVER)) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (is_error) + dev_printk(level, kdev, "%pV", &vaf); + else + dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV", + __builtin_return_address(0), &vaf); + + va_end(args); + + if (is_error && !shown_bug_once) { + /* + * Ask the user to file a bug report for the error, except + * if they may have caused the bug by fiddling with unsafe + * module parameters. + */ + if (!test_taint(TAINT_USER)) + dev_notice(kdev, "%s", FDO_BUG_MSG); + shown_bug_once = true; + } +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) +static unsigned int i915_probe_fail_count; + +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line) +{ + if (i915_probe_fail_count >= i915_modparams.inject_probe_failure) + return 0; + + if (++i915_probe_fail_count < i915_modparams.inject_probe_failure) + return 0; + + __i915_printk(i915, KERN_INFO, + "Injecting failure %d at checkpoint %u [%s:%d]\n", + err, i915_modparams.inject_probe_failure, func, line); + i915_modparams.inject_probe_failure = 0; + return err; +} + +bool i915_error_injected(void) +{ + return i915_probe_fail_count && !i915_modparams.inject_probe_failure; +} + +#endif + +void cancel_timer(struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return; + + del_timer(t); + WRITE_ONCE(t->expires, 0); +} + +void set_timer_ms(struct timer_list *t, unsigned long timeout) +{ + if (!timeout) { + cancel_timer(t); + return; + } + + timeout = msecs_to_jiffies_timeout(timeout); + + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffie. + */ + barrier(); + + mod_timer(t, jiffies + timeout); +} diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 2987219a6300..b0ade76bec90 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -31,6 +31,9 @@ #include <linux/types.h> #include <linux/workqueue.h> +struct drm_i915_private; +struct timer_list; + #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ #if 0 @@ -49,6 +52,34 @@ #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) +void __printf(3, 4) +__i915_printk(struct drm_i915_private *dev_priv, const char *level, + const char *fmt, ...); + +#define i915_report_error(dev_priv, fmt, ...) \ + __i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__) + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) + +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line); +#define i915_inject_probe_error(_i915, _err) \ + __i915_inject_probe_error((_i915), (_err), __func__, __LINE__) +bool i915_error_injected(void); + +#else + +#define i915_inject_probe_error(i915, e) ({ BUILD_BUG_ON_INVALID(i915); 0; }) +#define i915_error_injected() false + +#endif + +#define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV) + +#define i915_probe_error(i915, fmt, ...) \ + __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ + fmt, ##__VA_ARGS__) + #if defined(GCC_VERSION) && GCC_VERSION >= 70000 #define add_overflows_t(T, A, B) \ __builtin_add_overflow_p((A), (B), (T)0) @@ -131,6 +162,16 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size) ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ }) +#define ptr_dec(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v - 1); \ +}) + +#define ptr_inc(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v + 1); \ +}) + #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) @@ -370,4 +411,36 @@ static inline const char *enableddisabled(bool v) return v ? "enabled" : "disabled"; } +static inline void add_taint_for_CI(unsigned int taint) +{ + /* + * The system is "ok", just about surviving for the user, but + * CI results are now unreliable as the HW is very suspect. + * CI checks the taint state after every test and will reboot + * the machine if the kernel is tainted. + */ + add_taint(taint, LOCKDEP_STILL_OK); +} + +void cancel_timer(struct timer_list *t); +void set_timer_ms(struct timer_list *t, unsigned long timeout); + +static inline bool timer_expired(const struct timer_list *t) +{ + return READ_ONCE(t->expires) && !timer_pending(t); +} + +/* + * This is a lookalike for IS_ENABLED() that takes a kconfig value, + * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero + * i.e. whether the configuration is active. Wrapping up the config inside + * a boolean context prevents clang and smatch from complaining about potential + * issues in confusing logical-&& with bitwise-& for constants. + * + * Sadly IS_ENABLED() itself does not work with kconfig values. + * + * Returns 0 if @config is 0, 1 if set to any value. + */ +#define IS_ACTIVE(config) ((config) != 0) + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 94d3992b599d..968be26735c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -21,7 +21,6 @@ * SOFTWARE. */ -#include "intel_drv.h" #include "i915_vgpu.h" /** @@ -52,34 +51,54 @@ */ /** - * i915_check_vgpu - detect virtual GPU + * i915_detect_vgpu - detect virtual GPU * @dev_priv: i915 device private * * This function is called at the initialization stage, to detect whether * running on a vGPU. */ -void i915_check_vgpu(struct drm_i915_private *dev_priv) +void i915_detect_vgpu(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct pci_dev *pdev = dev_priv->drm.pdev; u64 magic; u16 version_major; + void __iomem *shared_area; BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); - magic = __raw_uncore_read64(uncore, vgtif_reg(magic)); - if (magic != VGT_MAGIC) + /* + * This is called before we setup the main MMIO BAR mappings used via + * the uncore structure, so we need to access the BAR directly. Since + * we do not support VGT on older gens, return early so we don't have + * to consider differently numbered or sized MMIO bars + */ + if (INTEL_GEN(dev_priv) < 6) return; - version_major = __raw_uncore_read16(uncore, vgtif_reg(version_major)); + shared_area = pci_iomap_range(pdev, 0, VGT_PVINFO_PAGE, VGT_PVINFO_SIZE); + if (!shared_area) { + DRM_ERROR("failed to map MMIO bar to check for VGT\n"); + return; + } + + magic = readq(shared_area + vgtif_offset(magic)); + if (magic != VGT_MAGIC) + goto out; + + version_major = readw(shared_area + vgtif_offset(version_major)); if (version_major < VGT_VERSION_MAJOR) { DRM_INFO("VGT interface version mismatch!\n"); - return; + goto out; } - dev_priv->vgpu.caps = __raw_uncore_read32(uncore, vgtif_reg(vgt_caps)); + dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps)); dev_priv->vgpu.active = true; + mutex_init(&dev_priv->vgpu.lock); DRM_INFO("Virtual GPU for Intel GVT-g detected.\n"); + +out: + pci_iounmap(pdev, shared_area); } bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *dev_priv) @@ -101,6 +120,9 @@ static struct _balloon_info_ bl_info; static void vgt_deballoon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + if (!drm_mm_node_allocated(node)) + return; + DRM_DEBUG_DRIVER("deballoon space: range [0x%llx - 0x%llx] %llu KiB.\n", node->start, node->start + node->size, @@ -112,22 +134,22 @@ static void vgt_deballoon_space(struct i915_ggtt *ggtt, /** * intel_vgt_deballoon - deballoon reserved graphics address trunks - * @dev_priv: i915 device private data + * @ggtt: the global GGTT from which we reserved earlier * * This function is called to deallocate the ballooned-out graphic memory, when * driver is unloaded or when ballooning fails. */ -void intel_vgt_deballoon(struct drm_i915_private *dev_priv) +void intel_vgt_deballoon(struct i915_ggtt *ggtt) { int i; - if (!intel_vgpu_active(dev_priv)) + if (!intel_vgpu_active(ggtt->vm.i915)) return; DRM_DEBUG("VGT deballoon.\n"); for (i = 0; i < 4; i++) - vgt_deballoon_space(&dev_priv->ggtt, &bl_info.space[i]); + vgt_deballoon_space(ggtt, &bl_info.space[i]); } static int vgt_balloon_space(struct i915_ggtt *ggtt, @@ -153,7 +175,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, /** * intel_vgt_balloon - balloon out reserved graphics address trunks - * @dev_priv: i915 device private data + * @ggtt: the global GGTT from which to reserve * * This function is called at the initialization stage, to balloon out the * graphic address space allocated to other vGPUs, by marking these spaces as @@ -195,22 +217,26 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, * Returns: * zero on success, non-zero if configuration invalid or ballooning failed */ -int intel_vgt_balloon(struct drm_i915_private *dev_priv) +int intel_vgt_balloon(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct intel_uncore *uncore = &ggtt->vm.i915->uncore; unsigned long ggtt_end = ggtt->vm.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; int ret; - if (!intel_vgpu_active(dev_priv)) + if (!intel_vgpu_active(ggtt->vm.i915)) return 0; - mappable_base = I915_READ(vgtif_reg(avail_rs.mappable_gmadr.base)); - mappable_size = I915_READ(vgtif_reg(avail_rs.mappable_gmadr.size)); - unmappable_base = I915_READ(vgtif_reg(avail_rs.nonmappable_gmadr.base)); - unmappable_size = I915_READ(vgtif_reg(avail_rs.nonmappable_gmadr.size)); + mappable_base = + intel_uncore_read(uncore, vgtif_reg(avail_rs.mappable_gmadr.base)); + mappable_size = + intel_uncore_read(uncore, vgtif_reg(avail_rs.mappable_gmadr.size)); + unmappable_base = + intel_uncore_read(uncore, vgtif_reg(avail_rs.nonmappable_gmadr.base)); + unmappable_size = + intel_uncore_read(uncore, vgtif_reg(avail_rs.nonmappable_gmadr.size)); mappable_end = mappable_base + mappable_size; unmappable_end = unmappable_base + unmappable_size; diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index ebe1b7bced98..8b3663dad193 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -24,9 +24,10 @@ #ifndef _I915_VGPU_H_ #define _I915_VGPU_H_ +#include "i915_drv.h" #include "i915_pvinfo.h" -void i915_check_vgpu(struct drm_i915_private *dev_priv); +void i915_detect_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *dev_priv); @@ -42,7 +43,7 @@ intel_vgpu_has_huge_gtt(struct drm_i915_private *dev_priv) return dev_priv->vgpu.caps & VGT_CAPS_HUGE_GTT; } -int intel_vgt_balloon(struct drm_i915_private *dev_priv); -void intel_vgt_deballoon(struct drm_i915_private *dev_priv); +int intel_vgt_balloon(struct i915_ggtt *ggtt); +void intel_vgt_deballoon(struct i915_ggtt *ggtt); #endif /* _I915_VGPU_H_ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index a57729be8312..4ff380770b32 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -22,14 +22,20 @@ * */ +#include <linux/sched/mm.h> #include <drm/drm_gem.h> #include "display/intel_frontbuffer.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_heartbeat.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_globals.h" +#include "i915_sw_fence_work.h" +#include "i915_trace.h" #include "i915_vma.h" static struct i915_global_vma { @@ -77,43 +83,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) #endif -static void obj_bump_mru(struct drm_i915_gem_object *obj) +static inline struct i915_vma *active_to_vma(struct i915_active *ref) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - list_move_tail(&obj->mm.link, &i915->mm.shrink_list); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + return container_of(ref, typeof(struct i915_vma), active); +} - obj->mm.dirty = true; /* be paranoid */ +static int __i915_vma_active(struct i915_active *ref) +{ + return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; } +__i915_active_call static void __i915_vma_retire(struct i915_active *ref) { - struct i915_vma *vma = container_of(ref, typeof(*vma), active); - struct drm_i915_gem_object *obj = vma->obj; - - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - if (--obj->active_count) - return; - - /* Prune the shared fence arrays iff completely idle (inc. external) */ - if (reservation_object_trylock(obj->base.resv)) { - if (reservation_object_test_signaled_rcu(obj->base.resv, true)) - reservation_object_add_excl_fence(obj->base.resv, NULL); - reservation_object_unlock(obj->base.resv); - } - - /* - * Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (i915_gem_object_is_shrinkable(obj)) - obj_bump_mru(obj); - - i915_gem_object_put(obj); /* and drop the active reference */ + i915_vma_put(active_to_vma(ref)); } static struct i915_vma * @@ -125,21 +108,29 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; /* The aliasing_ppgtt should never be used directly! */ - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); + GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm); vma = i915_vma_alloc(); if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->vm = vm; + kref_init(&vma->ref); + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; vma->resv = obj->base.resv; vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - i915_active_init(vm->i915, &vma->active, __i915_vma_retire); - INIT_ACTIVE_REQUEST(&vma->last_fence); + i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); + + /* Declare ourselves safe for use inside shrinkers */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&vma->active.mutex); + fs_reclaim_release(GFP_KERNEL); + } INIT_LIST_HEAD(&vma->closed_link); @@ -185,7 +176,7 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - vma->flags |= I915_VMA_GGTT; + __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } spin_lock(&obj->vma.lock); @@ -232,10 +223,6 @@ vma_create(struct drm_i915_gem_object *obj, spin_unlock(&obj->vma.lock); - mutex_lock(&vm->mutex); - list_add(&vma->vm_link, &vm->unbound_list); - mutex_unlock(&vm->mutex); - return vma; err_vma: @@ -279,8 +266,6 @@ vma_lookup(struct drm_i915_gem_object *obj, * Once created, the VMA is kept until either the object is freed, or the * address space is closed. * - * Must be called with struct_mutex held. - * * Returns the vma, or an error pointer. */ struct i915_vma * @@ -291,7 +276,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma *vma; GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(vm->closed); + GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); @@ -305,18 +290,70 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma_work { + struct dma_fence_work base; + struct i915_vma *vma; + struct drm_i915_gem_object *pinned; + enum i915_cache_level cache_level; + unsigned int flags; +}; + +static int __vma_bind(struct dma_fence_work *work) +{ + struct i915_vma_work *vw = container_of(work, typeof(*vw), base); + struct i915_vma *vma = vw->vma; + int err; + + err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags); + if (err) + atomic_or(I915_VMA_ERROR, &vma->flags); + + return err; +} + +static void __vma_release(struct dma_fence_work *work) +{ + struct i915_vma_work *vw = container_of(work, typeof(*vw), base); + + if (vw->pinned) + __i915_gem_object_unpin_pages(vw->pinned); +} + +static const struct dma_fence_work_ops bind_ops = { + .name = "bind", + .work = __vma_bind, + .release = __vma_release, +}; + +struct i915_vma_work *i915_vma_work(void) +{ + struct i915_vma_work *vw; + + vw = kzalloc(sizeof(*vw), GFP_KERNEL); + if (!vw) + return NULL; + + dma_fence_work_init(&vw->base, &bind_ops); + vw->base.dma.error = -EAGAIN; /* disable the worker by default */ + + return vw; +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map * @cache_level: mapping cache level * @flags: flags like global or local mapping + * @work: preallocated worker for allocating and binding the PTE * * DMA addresses are taken from the scatter-gather table of this object (or of * this VMA in case of non-default GGTT views) and PTE entries set up. * Note that DMA addresses are also the only part of the SG table we care about. */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work) { u32 bind_flags; u32 vma_flags; @@ -333,13 +370,11 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (GEM_DEBUG_WARN_ON(!flags)) return -EINVAL; - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; + bind_flags = flags; + bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + vma_flags = atomic_read(&vma->flags); + vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; if (flags & PIN_UPDATE) bind_flags |= vma_flags; else @@ -350,11 +385,36 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; + if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { + work->vma = vma; + work->cache_level = cache_level; + work->flags = bind_flags | I915_VMA_ALLOC; + + /* + * Note we only want to chain up to the migration fence on + * the pages (not the object itself). As we don't track that, + * yet, we have to use the exclusive fence instead. + * + * Also note that we do not want to track the async vma as + * part of the obj->resv->excl_fence as it only affects + * execution and not content or object's backing store lifetime. + */ + GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); + i915_active_set_exclusive(&vma->active, &work->base.dma); + work->base.dma.error = 0; /* enable the queue_work() */ - vma->flags |= bind_flags; + if (vma->obj) { + __i915_gem_object_pin_pages(vma->obj); + work->pinned = vma->obj; + } + } else { + GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags); + ret = vma->ops->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + } + + atomic_or(bind_flags, &vma->flags); return 0; } @@ -363,19 +423,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void __iomem *ptr; int err; - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); - - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); - ptr = vma->iomap; + ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, @@ -385,7 +441,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err; } - vma->iomap = ptr; + if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { + io_mapping_unmap(ptr); + ptr = vma->iomap; + } } __i915_vma_pin(vma); @@ -395,6 +454,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err_unpin; i915_vma_set_ggtt_write(vma); + + /* NB Access through the GTT requires the device to be awake. */ return ptr; err_unpin: @@ -405,18 +466,12 @@ err: void i915_vma_flush_writes(struct i915_vma *vma) { - if (!i915_vma_has_ggtt_write(vma)) - return; - - i915_gem_flush_ggtt_writes(vma->vm->i915); - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } void i915_vma_unpin_iomap(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); i915_vma_flush_writes(vma); @@ -452,6 +507,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false; + if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) + return true; + if (vma->node.size < size) return true; @@ -486,17 +544,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; if (mappable && fenceable) - vma->flags |= I915_VMA_CAN_FENCE; + set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); else - vma->flags &= ~I915_VMA_CAN_FENCE; -} - -static bool color_differs(struct drm_mm_node *node, unsigned long color) -{ - return node->allocated && node->color != color; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) { struct drm_mm_node *node = &vma->node; struct drm_mm_node *other; @@ -508,7 +561,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) * these constraints apply and set the drm_mm.color_adjust * appropriately. */ - if (vma->vm->mm.color_adjust == NULL) + if (!i915_vm_has_cache_coloring(vma->vm)) return true; /* Only valid to be called on an already inserted vma */ @@ -516,11 +569,13 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) GEM_BUG_ON(list_empty(&node->node_list)); other = list_prev_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(other)) + if (i915_node_color_differs(other, color) && + !drm_mm_hole_follows(other)) return false; other = list_next_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(node)) + if (i915_node_color_differs(other, color) && + !drm_mm_hole_follows(node)) return false; return true; @@ -555,13 +610,12 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj) static int i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = vma->vm->i915; - unsigned int cache_level; + unsigned long color; u64 start, end; int ret; GEM_BUG_ON(i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); @@ -581,7 +635,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); + end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -597,35 +651,21 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return -ENOSPC; } - if (vma->obj) { - ret = i915_gem_object_pin_pages(vma->obj); - if (ret) - return ret; - - cache_level = vma->obj->cache_level; - } else { - cache_level = 0; - } - - GEM_BUG_ON(vma->pages); - - ret = vma->ops->set_pages(vma); - if (ret) - goto err_unpin; + color = 0; + if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || - range_overflows(offset, size, end)) { - ret = -EINVAL; - goto err_clear; - } + range_overflows(offset, size, end)) + return -EINVAL; ret = i915_gem_gtt_reserve(vma->vm, &vma->node, - size, offset, cache_level, + size, offset, color, flags); if (ret) - goto err_clear; + return ret; } else { /* * We only support huge gtt pages through the 48b PPGTT, @@ -661,116 +701,297 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } ret = i915_gem_gtt_insert(vma->vm, &vma->node, - size, alignment, cache_level, + size, alignment, color, start, end, flags); if (ret) - goto err_clear; + return ret; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); - - mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); - mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); if (vma->obj) { - atomic_inc(&vma->obj->bind_count); - assert_bind_count(vma->obj); + struct drm_i915_gem_object *obj = vma->obj; + + atomic_inc(&obj->bind_count); + assert_bind_count(obj); } + list_add_tail(&vma->vm_link, &vma->vm->bound_list); return 0; - -err_clear: - vma->ops->clear_pages(vma); -err_unpin: - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); - return ret; } static void -i915_vma_remove(struct i915_vma *vma) +i915_vma_detach(struct i915_vma *vma) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - - vma->ops->clear_pages(vma); - - mutex_lock(&vma->vm->mutex); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); /* - * Since the unbound list is global, only move to that list if - * no more VMAs exist. + * And finally now the object is completely decoupled from this + * vma, we can drop its hold on the backing storage and allow + * it to be reaped by the shrinker. */ + list_del(&vma->vm_link); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; + assert_bind_count(obj); atomic_dec(&obj->bind_count); + } +} - /* - * And finally now the object is completely decoupled from this - * vma, we can drop its hold on the backing storage and allow - * it to be reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - assert_bind_count(obj); +static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) +{ + unsigned int bound; + bool pinned = true; + + bound = atomic_read(&vma->flags); + do { + if (unlikely(flags & ~bound)) + return false; + + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) + return false; + + if (!(bound & I915_VMA_PIN_MASK)) + goto unpinned; + + GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + + return true; + +unpinned: + /* + * If pin_count==0, but we are bound, check under the lock to avoid + * racing with a concurrent i915_vma_unbind(). + */ + mutex_lock(&vma->vm->mutex); + do { + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { + pinned = false; + break; + } + + if (unlikely(flags & ~bound)) { + pinned = false; + break; + } + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + mutex_unlock(&vma->vm->mutex); + + return pinned; +} + +static int vma_get_pages(struct i915_vma *vma) +{ + int err = 0; + + if (atomic_add_unless(&vma->pages_count, 1, 0)) + return 0; + + /* Allocations ahoy! */ + if (mutex_lock_interruptible(&vma->pages_mutex)) + return -EINTR; + + if (!atomic_read(&vma->pages_count)) { + if (vma->obj) { + err = i915_gem_object_pin_pages(vma->obj); + if (err) + goto unlock; + } + + err = vma->ops->set_pages(vma); + if (err) { + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); + goto unlock; + } } + atomic_inc(&vma->pages_count); + +unlock: + mutex_unlock(&vma->pages_mutex); + + return err; } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) +static void __vma_put_pages(struct i915_vma *vma, unsigned int count) { - const unsigned int bound = vma->flags; - int ret; + /* We allocate under vma_get_pages, so beware the shrinker */ + mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(atomic_read(&vma->pages_count) < count); + if (atomic_sub_return(count, &vma->pages_count) == 0) { + vma->ops->clear_pages(vma); + GEM_BUG_ON(vma->pages); + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); + } + mutex_unlock(&vma->pages_mutex); +} - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); +static void vma_put_pages(struct i915_vma *vma) +{ + if (atomic_add_unless(&vma->pages_count, -1, 1)) + return; - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err_unpin; + __vma_put_pages(vma, 1); +} + +static void vma_unbind_pages(struct i915_vma *vma) +{ + unsigned int count; + + lockdep_assert_held(&vma->vm->mutex); + + /* The upper portion of pages_count is the number of bindings */ + count = atomic_read(&vma->pages_count); + count >>= I915_VMA_PAGES_BIAS; + GEM_BUG_ON(!count); + + __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +} + +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_vma_work *work = NULL; + intel_wakeref_t wakeref = 0; + unsigned int bound; + int err; + + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + GEM_BUG_ON(flags & PIN_UPDATE); + GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); + + /* First try and grab the pin without rebinding the vma */ + if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) + return 0; + + err = vma_get_pages(vma); + if (err) + return err; + + if (flags & vma->vm->bind_async_flags) { + work = i915_vma_work(); + if (!work) { + err = -ENOMEM; + goto err_pages; + } } - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err_unpin; + if (flags & PIN_GLOBAL) + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + + /* No more allocations allowed once we hold vm->mutex */ + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + goto err_fence; + + bound = atomic_read(&vma->flags); + if (unlikely(bound & I915_VMA_ERROR)) { + err = -ENOMEM; + goto err_unlock; } - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags); - if (ret) - goto err_remove; + if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { + err = -EAGAIN; /* pins are meant to be fairly temporary */ + goto err_unlock; + } + + if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) { + __i915_vma_pin(vma); + goto err_unlock; + } - GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0); + err = i915_active_acquire(&vma->active); + if (err) + goto err_unlock; + + if (!(bound & I915_VMA_BIND_MASK)) { + err = i915_vma_insert(vma, size, alignment, flags); + if (err) + goto err_active; - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); + if (i915_is_ggtt(vma->vm)) + __i915_vma_set_map_and_fenceable(vma); + } + + GEM_BUG_ON(!vma->pages); + err = i915_vma_bind(vma, + vma->obj ? vma->obj->cache_level : 0, + flags, work); + if (err) + goto err_remove; + + /* There should only be at most 2 active bindings (user, global) */ + GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); + atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + __i915_vma_pin(vma); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_bound(vma, flags)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; err_remove: - if ((bound & I915_VMA_BIND_MASK) == 0) { - i915_vma_remove(vma); - GEM_BUG_ON(vma->pages); - GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) { + i915_vma_detach(vma); + drm_mm_remove_node(&vma->node); } -err_unpin: - __i915_vma_unpin(vma); - return ret; +err_active: + i915_active_release(&vma->active); +err_unlock: + mutex_unlock(&vma->vm->mutex); +err_fence: + if (work) + dma_fence_work_commit(&work->base); + if (wakeref) + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); +err_pages: + vma_put_pages(vma); + return err; +} + +static void flush_idle_contexts(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_flush_barriers(engine); + + intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); +} + +int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) +{ + struct i915_address_space *vm = vma->vm; + int err; + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + + do { + err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); + if (err != -ENOSPC) + return err; + + /* Unlike i915_vma_pin, we don't take no for an answer! */ + flush_idle_contexts(vm->gt); + if (mutex_lock_interruptible(&vm->mutex) == 0) { + i915_gem_evict_vm(vm); + mutex_unlock(&vm->mutex); + } + } while (1); } void i915_vma_close(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; + struct intel_gt *gt = vma->vm->gt; unsigned long flags; GEM_BUG_ON(i915_vma_is_closed(vma)); @@ -787,81 +1008,91 @@ void i915_vma_close(struct i915_vma *vma) * causing us to rebind the VMA once more. This ends up being a lot * of wasted work for the steady state. */ - spin_lock_irqsave(&i915->gt.closed_lock, flags); - list_add(&vma->closed_link, &i915->gt.closed_vma); - spin_unlock_irqrestore(&i915->gt.closed_lock, flags); + spin_lock_irqsave(>->closed_lock, flags); + list_add(&vma->closed_link, >->closed_vma); + spin_unlock_irqrestore(>->closed_lock, flags); } static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; + struct intel_gt *gt = vma->vm->gt; - if (!i915_vma_is_closed(vma)) - return; - - spin_lock_irq(&i915->gt.closed_lock); + spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(&i915->gt.closed_lock); + spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) { - __i915_vma_remove_closed(vma); + if (i915_vma_is_closed(vma)) + __i915_vma_remove_closed(vma); } -static void __i915_vma_destroy(struct i915_vma *vma) +void i915_vma_release(struct kref *ref) { - GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(vma->fence); - - GEM_BUG_ON(i915_active_request_isset(&vma->last_fence)); - - mutex_lock(&vma->vm->mutex); - list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); + struct i915_vma *vma = container_of(ref, typeof(*vma), ref); + + if (drm_mm_node_allocated(&vma->node)) { + mutex_lock(&vma->vm->mutex); + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + } + GEM_BUG_ON(i915_vma_is_active(vma)); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; spin_lock(&obj->vma.lock); list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma.tree); + rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); } - i915_active_fini(&vma->active); + __i915_vma_remove_closed(vma); + i915_vm_put(vma->vm); + i915_active_fini(&vma->active); i915_vma_free(vma); } -void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_parked(struct intel_gt *gt) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + struct i915_vma *vma, *next; - GEM_BUG_ON(i915_vma_is_pinned(vma)); + spin_lock_irq(>->closed_lock); + list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) { + struct drm_i915_gem_object *obj = vma->obj; + struct i915_address_space *vm = vma->vm; - __i915_vma_remove_closed(vma); + /* XXX All to avoid keeping a reference on i915_vma itself */ - WARN_ON(i915_vma_unbind(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; - __i915_vma_destroy(vma); -} + if (i915_vm_tryopen(vm)) { + list_del_init(&vma->closed_link); + } else { + i915_gem_object_put(obj); + obj = NULL; + } -void i915_vma_parked(struct drm_i915_private *i915) -{ - struct i915_vma *vma, *next; + spin_unlock_irq(>->closed_lock); - spin_lock_irq(&i915->gt.closed_lock); - list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { - list_del_init(&vma->closed_link); - spin_unlock_irq(&i915->gt.closed_lock); + if (obj) { + __i915_vma_put(vma); + i915_gem_object_put(obj); + } - i915_vma_destroy(vma); + i915_vm_close(vm); - spin_lock_irq(&i915->gt.closed_lock); + /* Restart after dropping lock */ + spin_lock_irq(>->closed_lock); + next = list_first_entry(>->closed_vma, + typeof(*next), closed_link); } - spin_unlock_irq(&i915->gt.closed_lock); + spin_unlock_irq(>->closed_lock); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -877,17 +1108,16 @@ static void __i915_vma_iounmap(struct i915_vma *vma) void i915_vma_revoke_mmap(struct i915_vma *vma) { - struct drm_vma_offset_node *node = &vma->obj->base.vma_node; + struct drm_vma_offset_node *node; u64 vma_offset; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (!i915_vma_has_userfault(vma)) return; GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(!vma->obj->userfault_count); + node = &vma->mmo->vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, @@ -899,21 +1129,18 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } -static void export_fence(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) +int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) { - struct reservation_object *resv = vma->resv; + int err; - /* - * Ignore errors from failing to allocate the new fence, we can't - * handle an error right now. Worst case should be missed - * synchronisation leading to rendering corruption. - */ - if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &rq->fence); - else if (reservation_object_reserve_shared(resv, 1) == 0) - reservation_object_add_shared_fence(resv, &rq->fence); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + /* Wait for the vma to be bound before we start! */ + err = i915_request_await_active(rq, &vma->active); + if (err) + return err; + + return i915_active_add_request(&vma->active, rq); } int i915_vma_move_to_active(struct i915_vma *vma, @@ -921,97 +1148,80 @@ int i915_vma_move_to_active(struct i915_vma *vma, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; + int err; - assert_vma_held(vma); assert_object_held(obj); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - /* - * Add a reference if we're newly entering the active list. - * The order in which we add operations to the retirement queue is - * vital here: mark_active adds to the start of the callback list, - * such that subsequent callbacks are called first. Therefore we - * add the active reference first and queue for it to be dropped - * *last*. - */ - if (!vma->active.count && !obj->active_count++) - i915_gem_object_get(obj); /* once more for the active ref */ - - if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) { - if (!vma->active.count && !--obj->active_count) - i915_gem_object_put(obj); - return -ENOMEM; - } - - GEM_BUG_ON(!i915_vma_is_active(vma)); - GEM_BUG_ON(!obj->active_count); + err = __i915_vma_move_to_active(vma, rq); + if (unlikely(err)) + return err; - obj->write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { - obj->write_domain = I915_GEM_DOMAIN_RENDER; + struct intel_frontbuffer *front; - if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - __i915_active_request_set(&obj->frontbuffer_write, rq); + front = __intel_frontbuffer_get(obj); + if (unlikely(front)) { + if (intel_frontbuffer_invalidate(front, ORIGIN_CS)) + i915_active_add_request(&front->write, rq); + intel_frontbuffer_put(front); + } + dma_resv_add_excl_fence(vma->resv, &rq->fence); + obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; + } else { + err = dma_resv_reserve_shared(vma->resv, 1); + if (unlikely(err)) + return err; + + dma_resv_add_shared_fence(vma->resv, &rq->fence); + obj->write_domain = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; + obj->mm.dirty = true; - if (flags & EXEC_OBJECT_NEEDS_FENCE) - __i915_active_request_set(&vma->last_fence, rq); - - export_fence(vma, rq, flags); + GEM_BUG_ON(!i915_vma_is_active(vma)); return 0; } -int i915_vma_unbind(struct i915_vma *vma) +int __i915_vma_unbind(struct i915_vma *vma) { int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); /* * First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. + * + * XXX Actually waiting under the vm->mutex is a hinderance and + * should be pipelined wherever possible. In cases where that is + * unavoidable, we should lift the wait to before the mutex. */ - might_sleep(); - if (i915_vma_is_active(vma)) { - /* - * When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - - ret = i915_active_wait(&vma->active); - if (ret) - goto unpin; - - ret = i915_active_request_retire(&vma->last_fence, - &vma->vm->i915->drm.struct_mutex); -unpin: - __i915_vma_unpin(vma); - if (ret) - return ret; - } - GEM_BUG_ON(i915_vma_is_active(vma)); + ret = i915_vma_sync(vma); + if (ret) + return ret; if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); - return -EBUSY; + return -EAGAIN; } + /* + * After confirming that no one else is pinning this vma, wait for + * any laggards who may have crept in during the wait (through + * a residual pin skipping the vm->mutex) to complete. + */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + if (!drm_mm_node_allocated(&vma->node)) return 0; + GEM_BUG_ON(i915_vma_is_pinned(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + if (i915_vma_is_map_and_fenceable(vma)) { /* * Check that we have flushed all writes through the GGTT @@ -1023,7 +1233,7 @@ unpin: GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); + ret = i915_vma_revoke_fence(vma); if (ret) return ret; @@ -1031,22 +1241,66 @@ unpin: i915_vma_revoke_mmap(vma); __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); - if (likely(!vma->vm->closed)) { + if (likely(atomic_read(&vma->vm->open))) { trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma); } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); - i915_vma_remove(vma); + i915_vma_detach(vma); + vma_unbind_pages(vma); + drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ return 0; } +int i915_vma_unbind(struct i915_vma *vma) +{ + struct i915_address_space *vm = vma->vm; + intel_wakeref_t wakeref = 0; + int err; + + if (!drm_mm_node_allocated(&vma->node)) + return 0; + + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + /* XXX not always required: nop_clear_range */ + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + + err = mutex_lock_interruptible(&vm->mutex); + if (err) + return err; + + err = __i915_vma_unbind(vma); + mutex_unlock(&vm->mutex); + + if (wakeref) + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + + return err; +} + +struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) +{ + i915_gem_object_make_unshrinkable(vma->obj); + return vma; +} + +void i915_vma_make_shrinkable(struct i915_vma *vma) +{ + i915_gem_object_make_shrinkable(vma->obj); +} + +void i915_vma_make_purgeable(struct i915_vma *vma) +{ + i915_gem_object_make_purgeable(vma->obj); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_vma.c" #endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4b769db649bf..02b31a62951e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -30,121 +30,14 @@ #include <drm/drm_mm.h> +#include "gem/i915_gem_object.h" + #include "i915_gem_gtt.h" #include "i915_gem_fence_reg.h" -#include "gem/i915_gem_object.h" #include "i915_active.h" #include "i915_request.h" - -enum i915_cache_level; - -/** - * DOC: Virtual Memory Address - * - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - const struct i915_vma_ops *ops; - struct i915_fence_reg *fence; - struct reservation_object *resv; /** Alias of obj->resv */ - struct sg_table *pages; - void __iomem *iomap; - void *private; /* owned by creator */ - u64 size; - u64 display_alignment; - struct i915_page_sizes page_sizes; - - u32 fence_size; - u32 fence_alignment; - - /** - * Count of the number of times this vma has been opened by different - * handles (but same file) for execbuf, i.e. the number of aliases - * that exist in the ctx->handle_vmas LUT for this vma. - */ - atomic_t open_count; - unsigned long flags; - /** - * How many users have pinned this object in GTT space. - * - * This is a tightly bound, fairly small number of users, so we - * stuff inside the flags field so that we can both check for overflow - * and detect a no-op i915_vma_pin() in a single check, while also - * pinning the vma. - * - * The worst case display setup would have the same vma pinned for - * use on each plane on each crtc, while also building the next atomic - * state and holding a pin for the length of the cleanup queue. In the - * future, the flip queue may be increased from 1. - * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 - * - * For GEM, the number of concurrent users for pwrite/pread is - * unbounded. For execbuffer, it is currently one but will in future - * be extended to allow multiple clients to pin vma concurrently. - * - * We also use suballocated pages, with each suballocation claiming - * its own pin on the shared vma. At present, this is limited to - * exclusive cachelines of a single page, so a maximum of 64 possible - * users. - */ -#define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW BIT(8) - - /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(9) -#define I915_VMA_LOCAL_BIND BIT(10) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) - -#define I915_VMA_GGTT BIT(11) -#define I915_VMA_CAN_FENCE BIT(12) -#define I915_VMA_USERFAULT_BIT 13 -#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(14) - - struct i915_active active; - struct i915_active_request last_fence; - - /** - * Support different GGTT views into the same object. - * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also - * assumed in GEM functions which take no ggtt view parameter. - */ - struct i915_ggtt_view ggtt_view; - - /** This object's place on the active/inactive lists */ - struct list_head vm_link; - - struct list_head obj_link; /* Link in the object's VMA list */ - struct rb_node obj_node; - struct hlist_node obj_hash; - - /** This vma's place in the execbuf reservation list */ - struct list_head exec_link; - struct list_head reloc_link; - - /** This vma's place in the eviction list */ - struct list_head evict_link; - - struct list_head closed_link; - - /** - * Used for performing relocations during execbuffer insertion. - */ - unsigned int *exec_flags; - struct hlist_node exec_node; - u32 exec_handle; -}; +#include "i915_vma_types.h" struct i915_vma * i915_vma_instance(struct drm_i915_gem_object *obj, @@ -159,52 +52,57 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) return !i915_active_is_idle(&vma->active); } +int __must_check __i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq); int __must_check i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags); +#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) + static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT; + return test_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT_WRITE; + return test_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - vma->flags |= I915_VMA_GGTT_WRITE; + set_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } -static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +static inline bool i915_vma_unset_ggtt_write(struct i915_vma *vma) { - vma->flags &= ~I915_VMA_GGTT_WRITE; + return test_and_clear_bit(I915_VMA_GGTT_WRITE_BIT, + __i915_vma_flags(vma)); } void i915_vma_flush_writes(struct i915_vma *vma); static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { - return vma->flags & I915_VMA_CAN_FENCE; + return test_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_set_userfault(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); - return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_and_set_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_unset_userfault(struct i915_vma *vma) { - return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return clear_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_userfault(const struct i915_vma *vma) { - return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_is_closed(const struct i915_vma *vma) @@ -215,7 +113,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(upper_32_bits(vma->node.start)); GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); return lower_32_bits(vma->node.start); @@ -232,6 +130,14 @@ static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) return vma; } +static inline struct i915_vma *i915_vma_tryget(struct i915_vma *vma) +{ + if (likely(kref_get_unless_zero(&vma->obj->base.refcount))) + return vma; + + return NULL; +} + static inline void i915_vma_put(struct i915_vma *vma) { i915_gem_object_put(vma->obj); @@ -286,55 +192,56 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +struct i915_vma_work *i915_vma_work(void); +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work); + +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); - -#define assert_vma_held(vma) reservation_object_assert_held((vma)->resv) -static inline void i915_vma_lock(struct i915_vma *vma) +static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma) { - reservation_object_lock(vma->resv, NULL); + if (kref_get_unless_zero(&vma->ref)) + return vma; + + return NULL; } -static inline void i915_vma_unlock(struct i915_vma *vma) +void i915_vma_release(struct kref *ref); +static inline void __i915_vma_put(struct i915_vma *vma) { - reservation_object_unlock(vma->resv); + kref_put(&vma->ref, i915_vma_release); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); +#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv) - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) { - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - } +static inline void i915_vma_lock(struct i915_vma *vma) +{ + dma_resv_lock(vma->resv, NULL); +} - return __i915_vma_do_pin(vma, size, alignment, flags); +static inline void i915_vma_unlock(struct i915_vma *vma) +{ + dma_resv_unlock(vma->resv); } +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags); + static inline int i915_vma_pin_count(const struct i915_vma *vma) { - return vma->flags & I915_VMA_PIN_MASK; + return atomic_read(&vma->flags) & I915_VMA_PIN_MASK; } static inline bool i915_vma_is_pinned(const struct i915_vma *vma) @@ -344,18 +251,18 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); + atomic_inc(&vma->flags); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); } static inline void __i915_vma_unpin(struct i915_vma *vma) { - vma->flags--; + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + atomic_dec(&vma->flags); } static inline void i915_vma_unpin(struct i915_vma *vma) { - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); __i915_vma_unpin(vma); } @@ -363,7 +270,13 @@ static inline void i915_vma_unpin(struct i915_vma *vma) static inline bool i915_vma_is_bound(const struct i915_vma *vma, unsigned int where) { - return vma->flags & where; + return atomic_read(&vma->flags) & where; +} + +static inline bool i915_node_color_differs(const struct drm_mm_node *node, + unsigned long color) +{ + return drm_mm_node_allocated(node) && node->color != color; } /** @@ -375,8 +288,6 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma, * the caller must call i915_vma_unpin_iomap to relinquish the pinning * after the iomapping is no longer required. * - * Callers must hold the struct_mutex. - * * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); @@ -388,8 +299,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + * This function is only valid to be called on a VMA previously + * iomapped by the caller with i915_vma_pin_iomap(). */ void i915_vma_unpin_iomap(struct i915_vma *vma); @@ -414,13 +325,15 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -int i915_vma_pin_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); +int __must_check i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_revoke_fence(struct i915_vma *vma); + +int __i915_vma_pin_fence(struct i915_vma *vma); static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; + GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0); + atomic_dec(&vma->fence->pin_count); } /** @@ -434,12 +347,11 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { - /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ if (vma->fence) __i915_vma_unpin_fence(vma); } -void i915_vma_parked(struct drm_i915_private *i915); +void i915_vma_parked(struct intel_gt *gt); #define for_each_until(cond) if (cond) break; else @@ -459,4 +371,14 @@ void i915_vma_parked(struct drm_i915_private *i915); struct i915_vma *i915_vma_alloc(void); void i915_vma_free(struct i915_vma *vma); +struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); +void i915_vma_make_shrinkable(struct i915_vma *vma); +void i915_vma_make_purgeable(struct i915_vma *vma); + +static inline int i915_vma_sync(struct i915_vma *vma) +{ + /* Wait for the asynchronous bindings and pending GPU reads */ + return i915_active_wait(&vma->active); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h new file mode 100644 index 000000000000..e0942efd5236 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_VMA_TYPES_H__ +#define __I915_VMA_TYPES_H__ + +#include <linux/rbtree.h> + +#include <drm/drm_mm.h> + +#include "gem/i915_gem_object_types.h" + +enum i915_cache_level; + +/** + * DOC: Global GTT views + * + * Background and previous state + * + * Historically objects could exists (be bound) in global GTT space only as + * singular instances with a view representing all of the object's backing pages + * in a linear fashion. This view will be called a normal view. + * + * To support multiple views of the same object, where the number of mapped + * pages is not equal to the backing store, or where the layout of the pages + * is not linear, concept of a GGTT view was added. + * + * One example of an alternative view is a stereo display driven by a single + * image. In this case we would have a framebuffer looking like this + * (2x2 pages): + * + * 12 + * 34 + * + * Above would represent a normal GGTT view as normally mapped for GPU or CPU + * rendering. In contrast, fed to the display engine would be an alternative + * view which could look something like this: + * + * 1212 + * 3434 + * + * In this example both the size and layout of pages in the alternative view is + * different from the normal view. + * + * Implementation and usage + * + * GGTT views are implemented using VMAs and are distinguished via enum + * i915_ggtt_view_type and struct i915_ggtt_view. + * + * A new flavour of core GEM functions which work with GGTT bound objects were + * added with the _ggtt_ infix, and sometimes with _view postfix to avoid + * renaming in large amounts of code. They take the struct i915_ggtt_view + * parameter encapsulating all metadata required to implement a view. + * + * As a helper for callers which are only interested in the normal view, + * globally const i915_ggtt_view_normal singleton instance exists. All old core + * GEM API functions, the ones not taking the view parameter, are operating on, + * or with the normal GGTT view. + * + * Code wanting to add or use a new GGTT view needs to: + * + * 1. Add a new enum with a suitable name. + * 2. Extend the metadata in the i915_ggtt_view structure if required. + * 3. Add support to i915_get_vma_pages(). + * + * New views are required to build a scatter-gather table from within the + * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and + * exists for the lifetime of an VMA. + * + * Core API is designed to have copy semantics which means that passed in + * struct i915_ggtt_view does not need to be persistent (left around after + * calling the core API functions). + * + */ + +struct intel_remapped_plane_info { + /* in gtt pages */ + unsigned int width, height, stride, offset; +} __packed; + +struct intel_remapped_info { + struct intel_remapped_plane_info plane[2]; + unsigned int unused_mbz; +} __packed; + +struct intel_rotation_info { + struct intel_remapped_plane_info plane[2]; +} __packed; + +struct intel_partial_info { + u64 offset; + unsigned int size; +} __packed; + +enum i915_ggtt_view_type { + I915_GGTT_VIEW_NORMAL = 0, + I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), + I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), +}; + +static inline void assert_i915_gem_gtt_types(void) +{ + BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); + BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int)); + + /* Check that rotation/remapped shares offsets for simplicity */ + BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != + offsetof(struct intel_rotation_info, plane[0])); + BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != + offsetofend(struct intel_rotation_info, plane[1])); + + /* As we encode the size of each branch inside the union into its type, + * we have to be careful that each branch has a unique size. + */ + switch ((enum i915_ggtt_view_type)0) { + case I915_GGTT_VIEW_NORMAL: + case I915_GGTT_VIEW_PARTIAL: + case I915_GGTT_VIEW_ROTATED: + case I915_GGTT_VIEW_REMAPPED: + /* gcc complains if these are identical cases */ + break; + } +} + +struct i915_ggtt_view { + enum i915_ggtt_view_type type; + union { + /* Members need to contain no holes/padding */ + struct intel_partial_info partial; + struct intel_rotation_info rotated; + struct intel_remapped_info remapped; + }; +}; + +/** + * DOC: Virtual Memory Address + * + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + + struct i915_address_space *vm; + const struct i915_vma_ops *ops; + + struct drm_i915_gem_object *obj; + struct dma_resv *resv; /** Alias of obj->resv */ + + struct sg_table *pages; + void __iomem *iomap; + void *private; /* owned by creator */ + + struct i915_fence_reg *fence; + + u64 size; + u64 display_alignment; + struct i915_page_sizes page_sizes; + + /* mmap-offset associated with fencing for this vma */ + struct i915_mmap_offset *mmo; + + u32 fence_size; + u32 fence_alignment; + + /** + * Count of the number of times this vma has been opened by different + * handles (but same file) for execbuf, i.e. the number of aliases + * that exist in the ctx->handle_vmas LUT for this vma. + */ + struct kref ref; + atomic_t open_count; + atomic_t flags; + /** + * How many users have pinned this object in GTT space. + * + * This is a tightly bound, fairly small number of users, so we + * stuff inside the flags field so that we can both check for overflow + * and detect a no-op i915_vma_pin() in a single check, while also + * pinning the vma. + * + * The worst case display setup would have the same vma pinned for + * use on each plane on each crtc, while also building the next atomic + * state and holding a pin for the length of the cleanup queue. In the + * future, the flip queue may be increased from 1. + * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 + * + * For GEM, the number of concurrent users for pwrite/pread is + * unbounded. For execbuffer, it is currently one but will in future + * be extended to allow multiple clients to pin vma concurrently. + * + * We also use suballocated pages, with each suballocation claiming + * its own pin on the shared vma. At present, this is limited to + * exclusive cachelines of a single page, so a maximum of 64 possible + * users. + */ +#define I915_VMA_PIN_MASK 0x3ff +#define I915_VMA_OVERFLOW 0x200 + + /** Flags and address space this VMA is bound to */ +#define I915_VMA_GLOBAL_BIND_BIT 10 +#define I915_VMA_LOCAL_BIND_BIT 11 + +#define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) +#define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) + +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) + +#define I915_VMA_ALLOC_BIT 12 +#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) + +#define I915_VMA_ERROR_BIT 13 +#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) + +#define I915_VMA_GGTT_BIT 14 +#define I915_VMA_CAN_FENCE_BIT 15 +#define I915_VMA_USERFAULT_BIT 16 +#define I915_VMA_GGTT_WRITE_BIT 17 + +#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) +#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) +#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) +#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) + + struct i915_active active; + +#define I915_VMA_PAGES_BIAS 24 +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) + atomic_t pages_count; /* number of active binds to the pages */ + struct mutex pages_mutex; /* protect acquire/release of backing pages */ + + /** + * Support different GGTT views into the same object. + * This means there can be multiple VMA mappings per object and per VM. + * i915_ggtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * assumed in GEM functions which take no ggtt view parameter. + */ + struct i915_ggtt_view ggtt_view; + + /** This object's place on the active/inactive lists */ + struct list_head vm_link; + + struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; + struct hlist_node obj_hash; + + /** This vma's place in the execbuf reservation list */ + struct list_head exec_link; + struct list_head reloc_link; + + /** This vma's place in the eviction list */ + struct list_head evict_link; + + struct list_head closed_link; + + /** + * Used for performing relocations during execbuffer insertion. + */ + unsigned int *exec_flags; + struct hlist_node exec_node; + u32 exec_handle; +}; + +#endif + diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 6ef74531588a..09870a31b4f0 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -39,8 +39,13 @@ #define GEN12_CSR_MAX_FW_SIZE ICL_CSR_MAX_FW_SIZE -#define ICL_CSR_PATH "i915/icl_dmc_ver1_07.bin" -#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) +#define TGL_CSR_PATH "i915/tgl_dmc_ver2_04.bin" +#define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 4) +#define TGL_CSR_MAX_FW_SIZE 0x6000 +MODULE_FIRMWARE(TGL_CSR_PATH); + +#define ICL_CSR_PATH "i915/icl_dmc_ver1_09.bin" +#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 9) #define ICL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_CSR_PATH); @@ -674,6 +679,8 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) intel_csr_runtime_pm_get(dev_priv); if (INTEL_GEN(dev_priv) >= 12) { + csr->fw_path = TGL_CSR_PATH; + csr->required_version = TGL_CSR_VERSION_REQUIRED; /* Allow to load fw via parameter using the last known size */ csr->max_fw_size = GEN12_CSR_MAX_FW_SIZE; } else if (IS_GEN(dev_priv, 11)) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7135d8dc32a7..6670a0763be2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -58,6 +58,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(CANNONLAKE), PLATFORM_NAME(ICELAKE), PLATFORM_NAME(ELKHARTLAKE), + PLATFORM_NAME(TIGERLAKE), }; #undef PLATFORM_NAME @@ -72,9 +73,30 @@ const char *intel_platform_name(enum intel_platform platform) return platform_names[platform]; } -void intel_device_info_dump_flags(const struct intel_device_info *info, - struct drm_printer *p) +static const char *iommu_name(void) { + const char *msg = "n/a"; + +#ifdef CONFIG_INTEL_IOMMU + msg = enableddisabled(intel_iommu_gfx_mapped); +#endif + + return msg; +} + +void intel_device_info_print_static(const struct intel_device_info *info, + struct drm_printer *p) +{ + drm_printf(p, "engines: %x\n", info->engine_mask); + drm_printf(p, "gen: %d\n", info->gen); + drm_printf(p, "gt: %d\n", info->gt); + drm_printf(p, "iommu: %s\n", iommu_name()); + drm_printf(p, "memory-regions: %x\n", info->memory_regions); + drm_printf(p, "page-sizes: %x\n", info->page_sizes); + drm_printf(p, "platform: %s\n", intel_platform_name(info->platform)); + drm_printf(p, "ppgtt-size: %d\n", info->ppgtt_size); + drm_printf(p, "ppgtt-type: %d\n", info->ppgtt_type); + #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name)); DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG @@ -92,9 +114,9 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) hweight8(sseu->slice_mask), sseu->slice_mask); drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslices, mask=%04x\n", + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); @@ -105,8 +127,8 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); } -void intel_device_info_dump_runtime(const struct intel_runtime_info *info, - struct drm_printer *p) +void intel_device_info_print_runtime(const struct intel_runtime_info *info, + struct drm_printer *p) { sseu_dump(&info->sseu, p); @@ -117,10 +139,9 @@ void intel_device_info_dump_runtime(const struct intel_runtime_info *info, static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, int subslice) { - int subslice_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - int slice_stride = sseu->max_subslices * subslice_stride; + int slice_stride = sseu->max_subslices * sseu->eu_stride; - return slice * slice_stride + subslice * subslice_stride; + return slice * slice_stride + subslice * sseu->eu_stride; } static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, @@ -129,7 +150,7 @@ static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int i, offset = sseu_eu_idx(sseu, slice, subslice); u16 eu_mask = 0; - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { eu_mask |= ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); } @@ -142,14 +163,14 @@ static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, { int i, offset = sseu_eu_idx(sseu, slice, subslice); - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { sseu->eu_mask[offset + i] = (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; } } -void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, - struct drm_printer *p) +void intel_device_info_print_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) { int s, ss; @@ -159,9 +180,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, } for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); for (ss = 0; ss < sseu->max_subslices; ss++) { u16 enabled_eus = sseu_get_eus(sseu, s, ss); @@ -182,44 +203,80 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; - if (IS_ELKHARTLAKE(dev_priv)) { - sseu->max_slices = 1; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; - } else { - sseu->max_slices = 1; - sseu->max_subslices = 8; - sseu->max_eus_per_subslice = 8; - } + if (IS_ELKHARTLAKE(dev_priv)) + intel_sseu_set_info(sseu, 1, 4, 8); + else + intel_sseu_set_info(sseu, 1, 8, 8); s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu->subslice_mask[s] & BIT(ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -235,23 +292,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) const int eu_mask = 0xff; u32 subslice_mask, eu_en; + intel_sseu_set_info(sseu, 6, 4, 8); + sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->max_slices = 6; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; - - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - sseu->subslice_mask[0] = subslice_mask; - for (s = 1; s < sseu->max_slices; s++) - sseu->subslice_mask[s] = subslice_mask & 0x3; /* Slice0 */ eu_en = ~I915_READ(GEN8_EU_DISABLE0); @@ -276,14 +320,25 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) eu_en = ~I915_READ(GEN10_EU_DISABLE3); sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - /* Do a second pass where we mark the subslices disabled if all their - * eus are off. - */ + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + for (s = 0; s < sseu->max_slices; s++) { + u32 subslice_mask_with_eus = subslice_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { if (sseu_get_eus(sseu, s, ss) == 0) - sseu->subslice_mask[s] &= ~BIT(ss); + subslice_mask_with_eus &= ~BIT(ss); } + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + intel_sseu_set_subslices(sseu, s, s == 0 ? + subslice_mask_with_eus : + subslice_mask_with_eus & 0x3); } sseu->eu_total = compute_eu_total(sseu); @@ -309,13 +364,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse; + u8 subslice_mask = 0; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); - sseu->max_slices = 1; - sseu->max_subslices = 2; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 1, 2, 8); if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = @@ -324,7 +378,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(0); + subslice_mask |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask); } @@ -335,10 +389,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(1); + subslice_mask |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + intel_sseu_set_subslices(sseu, 0, subslice_mask); + sseu->eu_total = compute_eu_total(sseu); /* @@ -371,9 +427,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; /* BXT has a single slice and at most 3 subslices. */ - sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; - sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3, + IS_GEN9_LP(dev_priv) ? 3 : 4, 8); /* * The subslice disable field is global, i.e. it applies @@ -392,14 +447,14 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); eu_disable = I915_READ(GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; u8 eu_disabled_mask; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; @@ -464,7 +519,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) } } -static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) +static void bdw_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; int s, ss; @@ -472,9 +527,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - sseu->max_slices = 3; - sseu->max_subslices = 3; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 3, 3, 8); /* * The subslice disable field is global, i.e. it applies @@ -501,13 +554,13 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; @@ -547,10 +600,11 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } -static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) +static void hsw_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse1; + u8 subslice_mask = 0; int s, ss; /* @@ -563,22 +617,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) /* fall through */ case 1: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0); + subslice_mask = BIT(0); break; case 2: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; case 3: sseu->slice_mask = BIT(0) | BIT(1); - sseu->subslice_mask[0] = BIT(0) | BIT(1); - sseu->subslice_mask[1] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; } - sseu->max_slices = hweight8(sseu->slice_mask); - sseu->max_subslices = hweight8(sseu->subslice_mask[0]); - fuse1 = I915_READ(HSW_PAVP_FUSE1); switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { default: @@ -595,9 +645,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice = 6; break; } - sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), + hweight8(subslice_mask), + sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { + intel_sseu_set_subslices(sseu, s, subslice_mask); + for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, (1UL << sseu->eu_per_subslice) - 1); @@ -715,7 +770,7 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) } return freq; - } else if (INTEL_GEN(dev_priv) <= 11) { + } else if (INTEL_GEN(dev_priv) <= 12) { u32 ctc_reg = I915_READ(CTC_MODE); u32 freq = 0; @@ -774,6 +829,8 @@ static const u16 subplatform_ult_ids[] = { INTEL_WHL_U_GT1_IDS(0), INTEL_WHL_U_GT2_IDS(0), INTEL_WHL_U_GT3_IDS(0), + INTEL_CML_U_GT1_IDS(0), + INTEL_CML_U_GT2_IDS(0), }; static const u16 subplatform_ulx_ids[] = { @@ -899,12 +956,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->num_sprites[pipe] = 1; } - if (i915_modparams.disable_display) { - DRM_INFO("Display disabled (module parameter)\n"); - info->num_pipes = 0; - } else if (HAS_DISPLAY(dev_priv) && - (IS_GEN_RANGE(dev_priv, 7, 8)) && - HAS_PCH_SPLIT(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && IS_GEN_RANGE(dev_priv, 7, 8) && + HAS_PCH_SPLIT(dev_priv)) { u32 fuse_strap = I915_READ(FUSE_STRAP); u32 sfuse_strap = I915_READ(SFUSE_STRAP); @@ -922,57 +975,65 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) (HAS_PCH_CPT(dev_priv) && !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { DRM_INFO("Display fused off, disabling\n"); - info->num_pipes = 0; + info->pipe_mask = 0; } else if (fuse_strap & IVB_PIPE_C_DISABLE) { DRM_INFO("PipeC fused off\n"); - info->num_pipes -= 1; + info->pipe_mask &= ~BIT(PIPE_C); } } else if (HAS_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 9) { u32 dfsm = I915_READ(SKL_DFSM); - u8 disabled_mask = 0; - bool invalid; - int num_bits; + u8 enabled_mask = info->pipe_mask; if (dfsm & SKL_DFSM_PIPE_A_DISABLE) - disabled_mask |= BIT(PIPE_A); + enabled_mask &= ~BIT(PIPE_A); if (dfsm & SKL_DFSM_PIPE_B_DISABLE) - disabled_mask |= BIT(PIPE_B); + enabled_mask &= ~BIT(PIPE_B); if (dfsm & SKL_DFSM_PIPE_C_DISABLE) - disabled_mask |= BIT(PIPE_C); - - num_bits = hweight8(disabled_mask); - - switch (disabled_mask) { - case BIT(PIPE_A): - case BIT(PIPE_B): - case BIT(PIPE_A) | BIT(PIPE_B): - case BIT(PIPE_A) | BIT(PIPE_C): - invalid = true; - break; - default: - invalid = false; - } + enabled_mask &= ~BIT(PIPE_C); + if (INTEL_GEN(dev_priv) >= 12 && + (dfsm & TGL_DFSM_PIPE_D_DISABLE)) + enabled_mask &= ~BIT(PIPE_D); - if (num_bits > info->num_pipes || invalid) - DRM_ERROR("invalid pipe fuse configuration: 0x%x\n", - disabled_mask); + /* + * At least one pipe should be enabled and if there are + * disabled pipes, they should be the last ones, with no holes + * in the mask. + */ + if (enabled_mask == 0 || !is_power_of_2(enabled_mask + 1)) + DRM_ERROR("invalid pipe fuse configuration: enabled_mask=0x%x\n", + enabled_mask); else - info->num_pipes -= num_bits; + info->pipe_mask = enabled_mask; + + if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) + info->display.has_hdcp = 0; + + if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) + info->display.has_fbc = 0; + + if (INTEL_GEN(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + info->display.has_csr = 0; + + if (INTEL_GEN(dev_priv) >= 10 && + (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) + info->display.has_dsc = 0; } /* Initialize slice/subslice/EU info */ if (IS_HASWELL(dev_priv)) - haswell_sseu_info_init(dev_priv); + hsw_sseu_info_init(dev_priv); else if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_info_init(dev_priv); else if (IS_BROADWELL(dev_priv)) - broadwell_sseu_info_init(dev_priv); + bdw_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 9)) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); @@ -1016,8 +1077,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEN11_GT_VEBOX_DISABLE_SHIFT; for (i = 0; i < I915_MAX_VCS; i++) { - if (!HAS_ENGINE(dev_priv, _VCS(i))) + if (!HAS_ENGINE(dev_priv, _VCS(i))) { + vdbox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vdbox_mask)) { info->engine_mask &= ~BIT(_VCS(i)); @@ -1028,8 +1091,9 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) /* * In Gen11, only even numbered logical VDBOXes are * hooked up to an SFC (Scaler & Format Converter) unit. + * In TGL each VDBOX has access to an SFC. */ - if (logical_vdbox++ % 2 == 0) + if (INTEL_GEN(dev_priv) >= 12 || logical_vdbox++ % 2 == 0) RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i); } DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n", @@ -1037,8 +1101,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv)); for (i = 0; i < I915_MAX_VECS; i++) { - if (!HAS_ENGINE(dev_priv, _VECS(i))) + if (!HAS_ENGINE(dev_priv, _VECS(i))) { + vebox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vebox_mask)) { info->engine_mask &= ~BIT(_VECS(i)); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ddafc819bf30..2725cb7fc169 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -78,6 +78,8 @@ enum intel_platform { /* gen11 */ INTEL_ICELAKE, INTEL_ELKHARTLAKE, + /* gen12 */ + INTEL_TIGERLAKE, INTEL_MAX_PLATFORMS }; @@ -105,12 +107,14 @@ enum intel_ppgtt_type { func(is_mobile); \ func(is_lp); \ func(require_force_probe); \ + func(is_dgfx); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ func(has_fpga_dbg); \ - func(has_guc); \ + func(has_global_mocs); \ + func(has_gt_uc); \ func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ @@ -132,10 +136,14 @@ enum intel_ppgtt_type { func(has_csr); \ func(has_ddi); \ func(has_dp_mst); \ + func(has_dsb); \ + func(has_dsc); \ func(has_fbc); \ func(has_gmch); \ + func(has_hdcp); \ func(has_hotplug); \ func(has_ipc); \ + func(has_modular_fia); \ func(has_overlay); \ func(has_psr); \ func(overlay_needs_physical); \ @@ -155,9 +163,11 @@ struct intel_device_info { unsigned int page_sizes; /* page sizes supported by the HW */ + u32 memory_regions; /* regions supported by the HW */ + u32 display_mmio_offset; - u8 num_pipes; + u8 pipe_mask; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); @@ -220,12 +230,13 @@ const char *intel_platform_name(enum intel_platform platform); void intel_device_info_subplatform_init(struct drm_i915_private *dev_priv); void intel_device_info_runtime_init(struct drm_i915_private *dev_priv); -void intel_device_info_dump_flags(const struct intel_device_info *info, - struct drm_printer *p); -void intel_device_info_dump_runtime(const struct intel_runtime_info *info, + +void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p); -void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, +void intel_device_info_print_runtime(const struct intel_runtime_info *info, struct drm_printer *p); +void intel_device_info_print_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p); void intel_device_info_init_mmio(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_ads.h b/drivers/gpu/drm/i915/intel_guc_ads.h deleted file mode 100644 index 7f40f9cd5fb9..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_ads.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef _INTEL_GUC_ADS_H_ -#define _INTEL_GUC_ADS_H_ - -struct intel_guc; - -int intel_guc_ads_create(struct intel_guc *guc); -void intel_guc_ads_destroy(struct intel_guc *guc); -void intel_guc_ads_reset(struct intel_guc *guc); - -#endif diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h deleted file mode 100644 index 41ba593a4df7..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_ct.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright © 2016-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _INTEL_GUC_CT_H_ -#define _INTEL_GUC_CT_H_ - -struct intel_guc; -struct i915_vma; - -#include "intel_guc_fwif.h" - -/** - * DOC: Command Transport (CT). - * - * Buffer based command transport is a replacement for MMIO based mechanism. - * It can be used to perform both host-2-guc and guc-to-host communication. - */ - -/** Represents single command transport buffer. - * - * A single command transport buffer consists of two parts, the header - * record (command transport buffer descriptor) and the actual buffer which - * holds the commands. - * - * @desc: pointer to the buffer descriptor - * @cmds: pointer to the commands buffer - */ -struct intel_guc_ct_buffer { - struct guc_ct_buffer_desc *desc; - u32 *cmds; -}; - -/** Represents pair of command transport buffers. - * - * Buffers go in pairs to allow bi-directional communication. - * To simplify the code we place both of them in the same vma. - * Buffers from the same pair must share unique owner id. - * - * @vma: pointer to the vma with pair of CT buffers - * @ctbs: buffers for sending(0) and receiving(1) commands - * @owner: unique identifier - * @next_fence: fence to be used with next send command - */ -struct intel_guc_ct_channel { - struct i915_vma *vma; - struct intel_guc_ct_buffer ctbs[2]; - u32 owner; - u32 next_fence; - bool enabled; -}; - -/** Holds all command transport channels. - * - * @host_channel: main channel used by the host - */ -struct intel_guc_ct { - struct intel_guc_ct_channel host_channel; - /* other channels are tbd */ - - /** @lock: protects pending requests list */ - spinlock_t lock; - - /** @pending_requests: list of requests waiting for response */ - struct list_head pending_requests; - - /** @incoming_requests: list of incoming requests */ - struct list_head incoming_requests; - - /** @worker: worker for handling incoming requests */ - struct work_struct worker; -}; - -void intel_guc_ct_init_early(struct intel_guc_ct *ct); -int intel_guc_ct_init(struct intel_guc_ct *ct); -void intel_guc_ct_fini(struct intel_guc_ct *ct); -int intel_guc_ct_enable(struct intel_guc_ct *ct); -void intel_guc_ct_disable(struct intel_guc_ct *ct); - -static inline void intel_guc_ct_stop(struct intel_guc_ct *ct) -{ - ct->host_channel.enabled = false; -} - -#endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c deleted file mode 100644 index 72cdafd9636a..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Vinit Azad <vinit.azad@intel.com> - * Ben Widawsky <ben@bwidawsk.net> - * Dave Gordon <david.s.gordon@intel.com> - * Alex Dai <yu.dai@intel.com> - */ - -#include "intel_guc_fw.h" -#include "i915_drv.h" - -#define __MAKE_GUC_FW_PATH(KEY) \ - "i915/" \ - __stringify(KEY##_GUC_FW_PREFIX) "_guc_" \ - __stringify(KEY##_GUC_FW_MAJOR) "." \ - __stringify(KEY##_GUC_FW_MINOR) "." \ - __stringify(KEY##_GUC_FW_PATCH) ".bin" - -#define SKL_GUC_FW_PREFIX skl -#define SKL_GUC_FW_MAJOR 32 -#define SKL_GUC_FW_MINOR 0 -#define SKL_GUC_FW_PATCH 3 -#define SKL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(SKL) -MODULE_FIRMWARE(SKL_GUC_FIRMWARE_PATH); - -#define BXT_GUC_FW_PREFIX bxt -#define BXT_GUC_FW_MAJOR 32 -#define BXT_GUC_FW_MINOR 0 -#define BXT_GUC_FW_PATCH 3 -#define BXT_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(BXT) -MODULE_FIRMWARE(BXT_GUC_FIRMWARE_PATH); - -#define KBL_GUC_FW_PREFIX kbl -#define KBL_GUC_FW_MAJOR 32 -#define KBL_GUC_FW_MINOR 0 -#define KBL_GUC_FW_PATCH 3 -#define KBL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(KBL) -MODULE_FIRMWARE(KBL_GUC_FIRMWARE_PATH); - -#define GLK_GUC_FW_PREFIX glk -#define GLK_GUC_FW_MAJOR 32 -#define GLK_GUC_FW_MINOR 0 -#define GLK_GUC_FW_PATCH 3 -#define GLK_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(GLK) -MODULE_FIRMWARE(GLK_GUC_FIRMWARE_PATH); - -#define ICL_GUC_FW_PREFIX icl -#define ICL_GUC_FW_MAJOR 32 -#define ICL_GUC_FW_MINOR 0 -#define ICL_GUC_FW_PATCH 3 -#define ICL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(ICL) -MODULE_FIRMWARE(ICL_GUC_FIRMWARE_PATH); - -static void guc_fw_select(struct intel_uc_fw *guc_fw) -{ - struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); - struct drm_i915_private *i915 = guc_to_i915(guc); - - GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); - - if (!HAS_GUC(i915)) - return; - - if (i915_modparams.guc_firmware_path) { - guc_fw->path = i915_modparams.guc_firmware_path; - guc_fw->major_ver_wanted = 0; - guc_fw->minor_ver_wanted = 0; - } else if (IS_ICELAKE(i915)) { - guc_fw->path = ICL_GUC_FIRMWARE_PATH; - guc_fw->major_ver_wanted = ICL_GUC_FW_MAJOR; - guc_fw->minor_ver_wanted = ICL_GUC_FW_MINOR; - } else if (IS_GEMINILAKE(i915)) { - guc_fw->path = GLK_GUC_FIRMWARE_PATH; - guc_fw->major_ver_wanted = GLK_GUC_FW_MAJOR; - guc_fw->minor_ver_wanted = GLK_GUC_FW_MINOR; - } else if (IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { - guc_fw->path = KBL_GUC_FIRMWARE_PATH; - guc_fw->major_ver_wanted = KBL_GUC_FW_MAJOR; - guc_fw->minor_ver_wanted = KBL_GUC_FW_MINOR; - } else if (IS_BROXTON(i915)) { - guc_fw->path = BXT_GUC_FIRMWARE_PATH; - guc_fw->major_ver_wanted = BXT_GUC_FW_MAJOR; - guc_fw->minor_ver_wanted = BXT_GUC_FW_MINOR; - } else if (IS_SKYLAKE(i915)) { - guc_fw->path = SKL_GUC_FIRMWARE_PATH; - guc_fw->major_ver_wanted = SKL_GUC_FW_MAJOR; - guc_fw->minor_ver_wanted = SKL_GUC_FW_MINOR; - } -} - -/** - * intel_guc_fw_init_early() - initializes GuC firmware struct - * @guc: intel_guc struct - * - * On platforms with GuC selects firmware for uploading - */ -void intel_guc_fw_init_early(struct intel_guc *guc) -{ - struct intel_uc_fw *guc_fw = &guc->fw; - - intel_uc_fw_init_early(guc_fw, INTEL_UC_FW_TYPE_GUC); - guc_fw_select(guc_fw); -} - -static void guc_prepare_xfer(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - /* Must program this register before loading the ucode with DMA */ - I915_WRITE(GUC_SHIM_CONTROL, GUC_DISABLE_SRAM_INIT_TO_ZEROES | - GUC_ENABLE_READ_CACHE_LOGIC | - GUC_ENABLE_MIA_CACHING | - GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | - GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | - GUC_ENABLE_MIA_CLOCK_GATING); - - if (IS_GEN9_LP(dev_priv)) - I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); - else - I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); - - if (IS_GEN(dev_priv, 9)) { - /* DOP Clock Gating Enable for GuC clocks */ - I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | - I915_READ(GEN7_MISCCPCTL))); - - /* allows for 5us (in 10ns units) before GT can go to RC6 */ - I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); - } -} - -/* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_uc_fw *fw = &guc->fw; - struct sg_table *pages = fw->obj->mm.pages; - u32 rsa[UOS_RSA_SCRATCH_COUNT]; - int i; - - sg_pcopy_to_buffer(pages->sgl, pages->nents, - rsa, sizeof(rsa), fw->rsa_offset); - - for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) - I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); -} - -static bool guc_xfer_completed(struct intel_guc *guc, u32 *status) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - /* Did we complete the xfer? */ - *status = I915_READ(DMA_CTRL); - return !(*status & START_DMA); -} - -/* - * Read the GuC status register (GUC_STATUS) and store it in the - * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. - * - * This is used for polling the GuC status in a wait_for() - * loop below. - */ -static inline bool guc_ready(struct intel_guc *guc, u32 *status) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 val = I915_READ(GUC_STATUS); - u32 uk_val = val & GS_UKERNEL_MASK; - - *status = val; - return (uk_val == GS_UKERNEL_READY) || - ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); -} - -static int guc_wait_ucode(struct intel_guc *guc) -{ - u32 status; - int ret; - - /* - * Wait for the GuC to start up. - * NB: Docs recommend not using the interrupt for completion. - * Measurements indicate this should take no more than 20ms, so a - * timeout here indicates that the GuC has failed and is unusable. - * (Higher levels of the driver may decide to reset the GuC and - * attempt the ucode load again if this happens.) - */ - ret = wait_for(guc_ready(guc, &status), 100); - DRM_DEBUG_DRIVER("GuC status %#x\n", status); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - DRM_ERROR("GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } - - if (ret == 0 && !guc_xfer_completed(guc, &status)) { - DRM_ERROR("GuC is ready, but the xfer %08x is incomplete\n", - status); - ret = -ENXIO; - } - - return ret; -} - -/* - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Architecturally, the DMA engine is bidirectional, and can potentially even - * transfer between GTT locations. This functionality is left out of the API - * for now as there is no need for it. - */ -static int guc_xfer_ucode(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_uc_fw *guc_fw = &guc->fw; - unsigned long offset; - - /* - * The header plus uCode will be copied to WOPCM via DMA, excluding any - * other components - */ - I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); - - /* Set the source address for the new blob */ - offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset; - I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); - I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); - - /* - * Set the DMA destination. Current uCode expects the code to be - * loaded at 8k; locations below this are used for the stack. - */ - I915_WRITE(DMA_ADDR_1_LOW, 0x2000); - I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - /* Finally start the DMA */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); - - return guc_wait_ucode(guc); -} -/* - * Load the GuC firmware blob into the MinuteIA. - */ -static int guc_fw_xfer(struct intel_uc_fw *guc_fw) -{ - struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); - struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - guc_prepare_xfer(guc); - - /* - * Note that GuC needs the CSS header plus uKernel code to be copied - * by the DMA engine in one operation, whereas the RSA signature is - * loaded via MMIO. - */ - guc_xfer_rsa(guc); - - ret = guc_xfer_ucode(guc); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - - return ret; -} - -/** - * intel_guc_fw_upload() - load GuC uCode to device - * @guc: intel_guc structure - * - * Called from intel_uc_init_hw() during driver load, resume from sleep and - * after a GPU reset. - * - * The firmware image should have already been fetched into memory, so only - * check that fetch succeeded, and then transfer the image to the h/w. - * - * Return: non-zero code on error - */ -int intel_guc_fw_upload(struct intel_guc *guc) -{ - return intel_uc_fw_upload(&guc->fw, guc_fw_xfer); -} diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h deleted file mode 100644 index 4ec5d3d9e2b0..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_fw.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef _INTEL_GUC_FW_H_ -#define _INTEL_GUC_FW_H_ - -struct intel_guc; - -void intel_guc_fw_init_early(struct intel_guc *guc); -int intel_guc_fw_upload(struct intel_guc *guc); - -#endif diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c deleted file mode 100644 index db531ebc7704..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ /dev/null @@ -1,1458 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include <linux/circ_buf.h> - -#include "gt/intel_engine_pm.h" -#include "gt/intel_lrc_reg.h" -#include "gt/intel_context.h" -#include "gem/i915_gem_context.h" - -#include "intel_guc_submission.h" -#include "i915_drv.h" - -#define GUC_PREEMPT_FINISHED 0x1 -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 -#define GUC_PREEMPT_BREADCRUMB_BYTES \ - (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) - -/** - * DOC: GuC-based command submission - * - * GuC client: - * A intel_guc_client refers to a submission path through GuC. Currently, there - * are two clients. One of them (the execbuf_client) is charged with all - * submissions to the GuC, the other one (preempt_client) is responsible for - * preempting the execbuf_client. This struct is the owner of a doorbell, a - * process descriptor and a workqueue (all of them inside a single gem object - * that contains all required pages for these elements). - * - * GuC stage descriptor: - * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a intel_guc_client and a - * guc_stage_desc (via the client's stage_id), so effectively only one - * gets used. This stage descriptor lets the GuC know about the doorbell, - * workqueue and process descriptor. Theoretically, it also lets the GuC - * know about our HW contexts (context ID, etc...), but we actually - * employ a kind of submission where the GuC uses the LRCA sent via the work - * item instead (the single guc_stage_desc associated to execbuf client - * contains information about the default kernel context only, but this is - * essentially unused). This is called a "proxy" submission. - * - * The Scratch registers: - * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes - * a value to the action register (SOFT_SCRATCH_0) along with any data. It then - * triggers an interrupt on the GuC via another register write (0xC4C8). - * Firmware writes a success/fail code back to the action register after - * processes the request. The kernel driver polls waiting for this update and - * then proceeds. - * See intel_guc_send() - * - * Doorbells: - * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) - * mapped into process space. - * - * Work Items: - * There are several types of work items that the host may place into a - * workqueue, each with its own requirements and limitations. Currently only - * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which - * represents in-order queue. The kernel driver packs ring tail pointer and an - * ELSP context descriptor dword into Work Item. - * See guc_add_request() - * - */ - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_PREEMPT_ADDR); -} - -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - -static inline bool is_high_priority(struct intel_guc_client *client) -{ - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || - client->priority == GUC_CLIENT_PRIORITY_HIGH); -} - -static int reserve_doorbell(struct intel_guc_client *client) -{ - unsigned long offset; - unsigned long end; - u16 id; - - GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); - - /* - * The bitmap tracks which doorbell registers are currently in use. - * It is split into two halves; the first half is used for normal - * priority contexts, the second half for high-priority ones. - */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (is_high_priority(client)) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset); - if (id == end) - return -ENOSPC; - - __set_bit(id, client->guc->doorbell_bitmap); - client->doorbell_id = id; - DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", - client->stage_id, yesno(is_high_priority(client)), - id); - return 0; -} - -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - -static void unreserve_doorbell(struct intel_guc_client *client) -{ - GEM_BUG_ON(!has_doorbell(client)); - - __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); - client->doorbell_id = GUC_DOORBELL_INVALID; -} - -/* - * Tell the GuC to allocate or deallocate a specific doorbell - */ - -static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) -{ - u32 action[] = { - INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - stage_id - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) -{ - u32 action[] = { - INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - stage_id - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) -{ - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - - return &base[client->stage_id]; -} - -/* - * Initialise, update, or clear doorbell data shared with the GuC - * - * These functions modify shared data and so need access to the mapped - * client object which contains the page being used for the doorbell - */ - -static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) -{ - struct guc_stage_desc *desc; - - /* Update the GuC's idea of the doorbell ID */ - desc = __get_stage_desc(client); - desc->db_id = new_id; -} - -static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) -{ - return client->vaddr + client->doorbell_offset; -} - -static bool __doorbell_valid(struct intel_guc *guc, u16 db_id) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - return I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID; -} - -static void __init_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_ENABLED; - doorbell->cookie = 0; -} - -static void __fini_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - u16 db_id = client->doorbell_id; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_DISABLED; - - /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit - * to go to zero after updating db_status before we call the GuC to - * release the doorbell - */ - if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10)) - WARN_ONCE(true, "Doorbell never became invalid after disable\n"); -} - -static int create_doorbell(struct intel_guc_client *client) -{ - int ret; - - if (WARN_ON(!has_doorbell(client))) - return -ENODEV; /* internal setup error, should never happen */ - - __update_doorbell_desc(client, client->doorbell_id); - __init_doorbell(client); - - ret = __guc_allocate_doorbell(client->guc, client->stage_id); - if (ret) { - __fini_doorbell(client); - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", - client->stage_id, ret); - return ret; - } - - return 0; -} - -static int destroy_doorbell(struct intel_guc_client *client) -{ - int ret; - - GEM_BUG_ON(!has_doorbell(client)); - - __fini_doorbell(client); - ret = __guc_deallocate_doorbell(client->guc, client->stage_id); - if (ret) - DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", - client->stage_id, ret); - - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - - return ret; -} - -static unsigned long __select_cacheline(struct intel_guc *guc) -{ - unsigned long offset; - - /* Doorbell uses a single cache line within a page */ - offset = offset_in_page(guc->db_cacheline); - - /* Moving to next cache line to reduce contention */ - guc->db_cacheline += cache_line_size(); - - DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cache_line_size()); - return offset; -} - -static inline struct guc_process_desc * -__get_process_desc(struct intel_guc_client *client) -{ - return client->vaddr + client->proc_desc_offset; -} - -/* - * Initialise the process descriptor shared with the GuC firmware. - */ -static void guc_proc_desc_init(struct intel_guc_client *client) -{ - struct guc_process_desc *desc; - - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); - - /* - * XXX: pDoorbell and WQVBaseAddress are pointers in process address - * space for ring3 clients (set them as in mmap_ioctl) or kernel - * space for kernel clients (map on demand instead? May make debug - * easier to have it mapped). - */ - desc->wq_base_addr = 0; - desc->db_base_addr = 0; - - desc->stage_id = client->stage_id; - desc->wq_size_bytes = GUC_WQ_SIZE; - desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = client->priority; -} - -static void guc_proc_desc_fini(struct intel_guc_client *client) -{ - struct guc_process_desc *desc; - - desc = __get_process_desc(client); - memset(desc, 0, sizeof(*desc)); -} - -static int guc_stage_desc_pool_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, - PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS)); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - guc->stage_desc_pool = vma; - guc->stage_desc_pool_vaddr = vaddr; - ida_init(&guc->stage_ids); - - return 0; -} - -static void guc_stage_desc_pool_destroy(struct intel_guc *guc) -{ - ida_destroy(&guc->stage_ids); - i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); -} - -/* - * Initialise/clear the stage descriptor shared with the GuC firmware. - * - * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures relating to this client (doorbell, process descriptor, - * write queue, etc). - */ -static void guc_stage_desc_init(struct intel_guc_client *client) -{ - struct intel_guc *guc = client->guc; - struct i915_gem_context *ctx = client->owner; - struct i915_gem_engines_iter it; - struct guc_stage_desc *desc; - struct intel_context *ce; - u32 gfx_addr; - - desc = __get_stage_desc(client); - memset(desc, 0, sizeof(*desc)); - - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | - GUC_STAGE_DESC_ATTR_KERNEL; - if (is_high_priority(client)) - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; - desc->stage_id = client->stage_id; - desc->priority = client->priority; - desc->db_id = client->doorbell_id; - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - struct guc_execlist_context *lrc; - - if (!(ce->engine->mask & client->engines)) - continue; - - /* TODO: We have a design issue to be solved here. Only when we - * receive the first batch, we know which engine is used by the - * user. But here GuC expects the lrc and ring to be pinned. It - * is not an issue for default context, which is the only one - * for now who owns a GuC client. But for future owner of GuC - * client, need to make sure lrc is pinned prior to enter here. - */ - if (!ce->state) - break; /* XXX: continue? */ - - /* - * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy - * submission or, in other words, not using a direct submission - * model) the KMD's LRCA is not used for any work submission. - * Instead, the GuC uses the LRCA of the user mode context (see - * guc_add_request below). - */ - lrc = &desc->lrc[ce->engine->guc_id]; - lrc->context_desc = lower_32_bits(ce->lrc_desc); - - /* The state page is after PPHWSP */ - lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + - LRC_STATE_PN * PAGE_SIZE; - - /* XXX: In direct submission, the GuC wants the HW context id - * here. In proxy submission, it wants the stage id - */ - lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | - (ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET); - - lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); - lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; - lrc->ring_next_free_location = lrc->ring_begin; - lrc->ring_current_tail_pointer_value = 0; - - desc->engines_used |= BIT(ce->engine->guc_id); - } - i915_gem_context_unlock_engines(ctx); - - DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", - client->engines, desc->engines_used); - WARN_ON(desc->engines_used == 0); - - /* - * The doorbell, process descriptor, and workqueue are all parts - * of the client object, which the GuC will reference via the GGTT - */ - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); - desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + - client->doorbell_offset; - desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); - desc->db_trigger_uk = gfx_addr + client->doorbell_offset; - desc->process_desc = gfx_addr + client->proc_desc_offset; - desc->wq_addr = gfx_addr + GUC_DB_SIZE; - desc->wq_size = GUC_WQ_SIZE; - - desc->desc_private = ptr_to_u64(client); -} - -static void guc_stage_desc_fini(struct intel_guc_client *client) -{ - struct guc_stage_desc *desc; - - desc = __get_stage_desc(client); - memset(desc, 0, sizeof(*desc)); -} - -/* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc_client *client, - u32 target_engine, u32 context_desc, - u32 ring_tail, u32 fence_id) -{ - /* wqi_len is in DWords, and does not include the one-word header */ - const size_t wqi_size = sizeof(struct guc_wq_item); - const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = __get_process_desc(client); - struct guc_wq_item *wqi; - u32 wq_off; - - lockdep_assert_held(&client->wq_lock); - - /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we - * should not have the case where structure wqi is across page, neither - * wrapped to the beginning. This simplifies the implementation below. - * - * XXX: if not the case, we need save data to a temp wqi and copy it to - * workqueue buffer dw by dw. - */ - BUILD_BUG_ON(wqi_size != 16); - - /* We expect the WQ to be active if we're appending items to it */ - GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE); - - /* Free space is guaranteed. */ - wq_off = READ_ONCE(desc->tail); - GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), - GUC_WQ_SIZE) < wqi_size); - GEM_BUG_ON(wq_off & (wqi_size - 1)); - - /* WQ starts from the page after doorbell / process_desc */ - wqi = client->vaddr + wq_off + GUC_DB_SIZE; - - if (I915_SELFTEST_ONLY(client->use_nop_wqi)) { - wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT); - } else { - /* Now fill in the 4-word work queue item */ - wqi->header = WQ_TYPE_INORDER | - (wqi_len << WQ_LEN_SHIFT) | - (target_engine << WQ_TARGET_SHIFT) | - WQ_NO_WCFLUSH_WAIT; - wqi->context_desc = context_desc; - wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; - GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); - wqi->fence_id = fence_id; - } - - /* Make the update visible to GuC */ - WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); -} - -static void guc_ring_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *db; - u32 cookie; - - lockdep_assert_held(&client->wq_lock); - - /* pointer of current doorbell cacheline */ - db = __get_doorbell(client); - - /* - * We're not expecting the doorbell cookie to change behind our back, - * we also need to treat 0 as a reserved value. - */ - cookie = READ_ONCE(db->cookie); - WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); - - /* XXX: doorbell was lost and need to acquire it again */ - GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); -} - -static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) -{ - struct intel_guc_client *client = guc->execbuf_client; - struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); - u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - - spin_lock(&client->wq_lock); - - guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->fence.seqno); - guc_ring_doorbell(client); - - client->submissions[engine->id] += 1; - - spin_unlock(&client->wq_lock); -} - -/* - * When we're doing submissions using regular execlists backend, writing to - * ELSP from CPU side is enough to make sure that writes to ringbuffer pages - * pinned in mappable aperture portion of GGTT are visible to command streamer. - * Writes done by GuC on our behalf are not guaranteeing such ordering, - * therefore, to ensure the flush, we're issuing a POSTING READ. - */ -static void flush_ggtt_writes(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS); -} - -static void inject_preempt_context(struct work_struct *work) -{ - struct guc_preempt_work *preempt_work = - container_of(work, typeof(*preempt_work), work); - struct intel_engine_cs *engine = preempt_work->engine; - struct intel_guc *guc = container_of(preempt_work, typeof(*guc), - preempt_work[engine->id]); - struct intel_guc_client *client = guc->preempt_client; - struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_context *ce = engine->preempt_context; - u32 data[7]; - - if (!ce->ring->emit) { /* recreate upon load/resume */ - u32 addr = intel_hws_preempt_done_address(engine); - u32 *cs; - - cs = ce->ring->vaddr; - if (engine->class == RENDER_CLASS) { - cs = gen8_emit_ggtt_write_rcs(cs, - GUC_PREEMPT_FINISHED, - addr, - PIPE_CONTROL_CS_STALL); - } else { - cs = gen8_emit_ggtt_write(cs, - GUC_PREEMPT_FINISHED, - addr, - 0); - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - ce->ring->emit = GUC_PREEMPT_BREADCRUMB_BYTES; - GEM_BUG_ON((void *)cs - ce->ring->vaddr != ce->ring->emit); - - flush_ggtt_writes(ce->ring->vma); - } - - spin_lock_irq(&client->wq_lock); - guc_wq_item_append(client, engine->guc_id, lower_32_bits(ce->lrc_desc), - GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0); - spin_unlock_irq(&client->wq_lock); - - /* - * If GuC firmware performs an engine reset while that engine had - * a preemption pending, it will set the terminated attribute bit - * on our preemption stage descriptor. GuC firmware retains all - * pending work items for a high-priority GuC client, unlike the - * normal-priority GuC client where work items are dropped. It - * wants to make sure the preempt-to-idle work doesn't run when - * scheduling resumes, and uses this bit to inform its scheduler - * and presumably us as well. Our job is to clear it for the next - * preemption after reset, otherwise that and future preemptions - * will never complete. We'll just clear it every time. - */ - stage_desc->attribute &= ~GUC_STAGE_DESC_ATTR_TERMINATED; - - data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION; - data[1] = client->stage_id; - data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q | - INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q; - data[3] = engine->guc_id; - data[4] = guc->execbuf_client->priority; - data[5] = guc->execbuf_client->stage_id; - data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); - - if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { - execlists_clear_active(&engine->execlists, - EXECLISTS_ACTIVE_PREEMPT); - tasklet_schedule(&engine->execlists.tasklet); - } - - (void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++); -} - -/* - * We're using user interrupt and HWSP value to mark that preemption has - * finished and GPU is idle. Normally, we could unwind and continue similar to - * execlists submission path. Unfortunately, with GuC we also need to wait for - * it to finish its own postprocessing, before attempting to submit. Otherwise - * GuC may silently ignore our submissions, and thus we risk losing request at - * best, executing out-of-order and causing kernel panic at worst. - */ -#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10 -static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) -{ - struct intel_guc *guc = &engine->i915->guc; - struct guc_shared_ctx_data *data = guc->shared_data_vaddr; - struct guc_ctx_report *report = - &data->preempt_ctx_report[engine->guc_id]; - - if (wait_for_atomic(report->report_return_status == - INTEL_GUC_REPORT_STATUS_COMPLETE, - GUC_PREEMPT_POSTPROCESS_DELAY_MS)) - DRM_ERROR("Timed out waiting for GuC preemption report\n"); - /* - * GuC is expecting that we're also going to clear the affected context - * counter, let's also reset the return status to not depend on GuC - * resetting it after recieving another preempt action - */ - report->affected_count = 0; - report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN; -} - -static void complete_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - - if (inject_preempt_hang(execlists)) - return; - - execlists_cancel_port_requests(execlists); - execlists_unwind_incomplete_requests(execlists); - - wait_for_guc_preempt_report(engine); - intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0); -} - -/** - * guc_submit() - Submit commands through GuC - * @engine: engine associated with the commands - * - * The only error here arises if the doorbell hardware isn't functioning - * as expected, which really shouln't happen. - */ -static void guc_submit(struct intel_engine_cs *engine) -{ - struct intel_guc *guc = &engine->i915->guc; - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - unsigned int n; - - for (n = 0; n < execlists_num_ports(execlists); n++) { - struct i915_request *rq; - unsigned int count; - - rq = port_unpack(&port[n], &count); - if (rq && count == 0) { - port_set(&port[n], port_pack(rq, ++count)); - - flush_ggtt_writes(rq->ring->vma); - - guc_add_request(guc, rq); - } - } -} - -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(port_isset(port)); - - port_set(port, i915_request_get(rq)); -} - -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - -static inline int port_prio(const struct execlist_port *port) -{ - return rq_prio(port_request(port)) | __NO_PREEMPTION; -} - -static bool __guc_dequeue(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - struct i915_request *last = NULL; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - bool submit = false; - struct rb_node *rb; - - lockdep_assert_held(&engine->active.lock); - - if (port_isset(port)) { - if (intel_engine_has_preemption(engine)) { - struct guc_preempt_work *preempt_work = - &engine->i915->guc.preempt_work[engine->id]; - int prio = execlists->queue_priority_hint; - - if (i915_scheduler_need_preempt(prio, - port_prio(port))) { - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); - queue_work(engine->i915->guc.preempt_wq, - &preempt_work->work); - return false; - } - } - - port++; - if (port_isset(port)) - return false; - } - GEM_BUG_ON(port_isset(port)); - - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - struct i915_request *rq, *rn; - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - if (last && rq->hw_context != last->hw_context) { - if (port == last_port) - goto done; - - if (submit) - port_assign(port, last); - port++; - } - - list_del_init(&rq->sched.link); - - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - - last = rq; - submit = true; - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } -done: - execlists->queue_priority_hint = - rb ? to_priolist(rb)->priority : INT_MIN; - if (submit) - port_assign(port, last); - if (last) - execlists_user_begin(execlists, execlists->port); - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(port_isset(execlists->port) && - !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - return submit; -} - -static void guc_dequeue(struct intel_engine_cs *engine) -{ - if (__guc_dequeue(engine)) - guc_submit(engine); -} - -static void guc_submission_tasklet(unsigned long data) -{ - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - struct i915_request *rq; - unsigned long flags; - - spin_lock_irqsave(&engine->active.lock, flags); - - rq = port_request(port); - while (rq && i915_request_completed(rq)) { - trace_i915_request_out(rq); - i915_request_put(rq); - - port = execlists_port_complete(execlists, port); - if (port_isset(port)) { - execlists_user_begin(execlists, port); - rq = port_request(port); - } else { - execlists_user_end(execlists); - rq = NULL; - } - } - - if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && - intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == - GUC_PREEMPT_FINISHED) - complete_preempt_context(engine); - - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) - guc_dequeue(engine); - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void guc_reset_prepare(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - GEM_TRACE("%s\n", engine->name); - - /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. - */ - __tasklet_disable_sync_once(&execlists->tasklet); - - /* - * We're using worker to queue preemption requests from the tasklet in - * GuC submission mode. - * Even though tasklet was disabled, we may still have a worker queued. - * Let's make sure that all workers scheduled before disabling the - * tasklet are completed before continuing with the reset. - */ - if (engine->i915->guc.preempt_wq) - flush_workqueue(engine->i915->guc.preempt_wq); -} - -static void guc_reset(struct intel_engine_cs *engine, bool stalled) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq; - unsigned long flags; - - spin_lock_irqsave(&engine->active.lock, flags); - - execlists_cancel_port_requests(execlists); - - /* Push back any incomplete requests for replay after the reset. */ - rq = execlists_unwind_incomplete_requests(execlists); - if (!rq) - goto out_unlock; - - if (!i915_request_started(rq)) - stalled = false; - - i915_reset_request(rq, stalled); - intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); - -out_unlock: - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void guc_cancel_requests(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq, *rn; - struct rb_node *rb; - unsigned long flags; - - GEM_TRACE("%s\n", engine->name); - - /* - * Before we call engine->cancel_requests(), we should have exclusive - * access to the submission state. This is arranged for us by the - * caller disabling the interrupt generation, the tasklet and other - * threads that may then access the same state, giving us a free hand - * to reset state. However, we still need to let lockdep be aware that - * we know this state may be accessed in hardirq context, so we - * disable the irq around this manipulation and we want to keep - * the spinlock focused on its duties and not accidentally conflate - * coverage to the submission's irq state. (Similarly, although we - * shouldn't need to disable irq around the manipulation of the - * submission's irq state, we also wish to remind ourselves that - * it is irq state.) - */ - spin_lock_irqsave(&engine->active.lock, flags); - - /* Cancel the requests on the HW and clear the ELSP tracker. */ - execlists_cancel_port_requests(execlists); - - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } - - /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); - __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } - - /* Remaining _unready_ requests will be nop'ed when submitted */ - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void guc_reset_finish(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); - - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); -} - -/* - * Everything below here is concerned with setup & teardown, and is - * therefore not part of the somewhat time-critical batch-submission - * path of guc_submit() above. - */ - -/* Check that a doorbell register is in the expected state */ -static bool doorbell_ok(struct intel_guc *guc, u16 db_id) -{ - bool valid; - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - - valid = __doorbell_valid(guc, db_id); - - if (test_bit(db_id, guc->doorbell_bitmap) == valid) - return true; - - DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n", - db_id, yesno(valid)); - - return false; -} - -static bool guc_verify_doorbells(struct intel_guc *guc) -{ - bool doorbells_ok = true; - u16 db_id; - - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) - if (!doorbell_ok(guc, db_id)) - doorbells_ok = false; - - return doorbells_ok; -} - -/** - * guc_client_alloc() - Allocate an intel_guc_client - * @dev_priv: driver private data structure - * @engines: The set of engines to enable for this client - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. - * @ctx: the context that owns the client (we use the default render - * context) - * - * Return: An intel_guc_client object if success, else NULL. - */ -static struct intel_guc_client * -guc_client_alloc(struct drm_i915_private *dev_priv, - u32 engines, - u32 priority, - struct i915_gem_context *ctx) -{ - struct intel_guc_client *client; - struct intel_guc *guc = &dev_priv->guc; - struct i915_vma *vma; - void *vaddr; - int ret; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return ERR_PTR(-ENOMEM); - - client->guc = guc; - client->owner = ctx; - client->engines = engines; - client->priority = priority; - client->doorbell_id = GUC_DOORBELL_INVALID; - spin_lock_init(&client->wq_lock); - - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); - if (ret < 0) - goto err_client; - - client->stage_id = ret; - - /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_id; - } - - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->vma = vma; - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - client->vaddr = vaddr; - - ret = reserve_doorbell(client); - if (ret) - goto err_vaddr; - - client->doorbell_offset = __select_cacheline(guc); - - /* - * Since the doorbell only requires a single cacheline, we can save - * space by putting the application process descriptor in the same - * page. Use the half of the page that doesn't include the doorbell. - */ - if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) - client->proc_desc_offset = 0; - else - client->proc_desc_offset = (GUC_DB_SIZE / 2); - - DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n", - priority, client, client->engines, client->stage_id); - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", - client->doorbell_id, client->doorbell_offset); - - return client; - -err_vaddr: - i915_gem_object_unpin_map(client->vma->obj); -err_vma: - i915_vma_unpin_and_release(&client->vma, 0); -err_id: - ida_simple_remove(&guc->stage_ids, client->stage_id); -err_client: - kfree(client); - return ERR_PTR(ret); -} - -static void guc_client_free(struct intel_guc_client *client) -{ - unreserve_doorbell(client); - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); - ida_simple_remove(&client->guc->stage_ids, client->stage_id); - kfree(client); -} - -static inline bool ctx_save_restore_disabled(struct intel_context *ce) -{ - u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; - -#define SR_DISABLED \ - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) - - return (sr & SR_DISABLED) == SR_DISABLED; - -#undef SR_DISABLED -} - -static int guc_clients_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_guc_client *client; - - GEM_BUG_ON(guc->execbuf_client); - GEM_BUG_ON(guc->preempt_client); - - client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->engine_mask, - GUC_CLIENT_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for submission!\n"); - return PTR_ERR(client); - } - guc->execbuf_client = client; - - if (dev_priv->preempt_context) { - client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->engine_mask, - GUC_CLIENT_PRIORITY_KMD_HIGH, - dev_priv->preempt_context); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for preemption!\n"); - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; - return PTR_ERR(client); - } - guc->preempt_client = client; - } - - return 0; -} - -static void guc_clients_destroy(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - client = fetch_and_zero(&guc->preempt_client); - if (client) - guc_client_free(client); - - client = fetch_and_zero(&guc->execbuf_client); - if (client) - guc_client_free(client); -} - -static int __guc_client_enable(struct intel_guc_client *client) -{ - int ret; - - guc_proc_desc_init(client); - guc_stage_desc_init(client); - - ret = create_doorbell(client); - if (ret) - goto fail; - - return 0; - -fail: - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); - return ret; -} - -static void __guc_client_disable(struct intel_guc_client *client) -{ - /* - * By the time we're here, GuC may have already been reset. if that is - * the case, instead of trying (in vain) to communicate with it, let's - * just cleanup the doorbell HW and our internal state. - */ - if (intel_guc_is_loaded(client->guc)) - destroy_doorbell(client); - else - __fini_doorbell(client); - - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); -} - -static int guc_clients_enable(struct intel_guc *guc) -{ - int ret; - - ret = __guc_client_enable(guc->execbuf_client); - if (ret) - return ret; - - if (guc->preempt_client) { - ret = __guc_client_enable(guc->preempt_client); - if (ret) { - __guc_client_disable(guc->execbuf_client); - return ret; - } - } - - return 0; -} - -static void guc_clients_disable(struct intel_guc *guc) -{ - if (guc->preempt_client) - __guc_client_disable(guc->preempt_client); - - if (guc->execbuf_client) - __guc_client_disable(guc->execbuf_client); -} - -/* - * Set up the memory resources to be shared with the GuC (via the GGTT) - * at firmware loading time. - */ -int intel_guc_submission_init(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - if (guc->stage_desc_pool) - return 0; - - ret = guc_stage_desc_pool_create(guc); - if (ret) - return ret; - /* - * Keep static analysers happy, let them know that we allocated the - * vma after testing that it didn't exist earlier. - */ - GEM_BUG_ON(!guc->stage_desc_pool); - - WARN_ON(!guc_verify_doorbells(guc)); - ret = guc_clients_create(guc); - if (ret) - goto err_pool; - - for_each_engine(engine, dev_priv, id) { - guc->preempt_work[id].engine = engine; - INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); - } - - return 0; - -err_pool: - guc_stage_desc_pool_destroy(guc); - return ret; -} - -void intel_guc_submission_fini(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - cancel_work_sync(&guc->preempt_work[id].work); - - guc_clients_destroy(guc); - WARN_ON(!guc_verify_doorbells(guc)); - - if (guc->stage_desc_pool) - guc_stage_desc_pool_destroy(guc); -} - -static void guc_interrupts_capture(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; - - /* tell all command streamers to forward interrupts (but not vblank) - * to GuC - */ - irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, dev_priv, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route USER_INTERRUPT to Host, all others are sent to GuC. */ - irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - /* These three registers have the same bit definitions */ - I915_WRITE(GUC_BCS_RCS_IER, ~irqs); - I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); - I915_WRITE(GUC_WD_VECS_IER, ~irqs); - - /* - * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all - * (unmasked) PM interrupts to the GuC. All other bits of this - * register *disable* generation of a specific interrupt. - * - * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when - * writing to the PM interrupt mask register, i.e. interrupts - * that must not be disabled. - * - * If the GuC is handling these interrupts, then we must not let - * the PM code disable ANY interrupt that the GuC is expecting. - * So for each ENABLED (0) bit in this register, we must SET the - * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. - * GuC needs ARAT expired interrupt unmasked hence it is set in - * pm_intrmsk_mbz. - * - * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will - * result in the register bit being left SET! - */ - rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; -} - -static void guc_interrupts_release(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; - - /* - * tell all command streamers NOT to forward interrupts or vblank - * to GuC. - */ - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, dev_priv, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route all GT interrupts to the host */ - I915_WRITE(GUC_BCS_RCS_IER, 0); - I915_WRITE(GUC_VCS2_VCS1_IER, 0); - I915_WRITE(GUC_WD_VECS_IER, 0); - - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; -} - -static void guc_submission_park(struct intel_engine_cs *engine) -{ - intel_engine_park(engine); - intel_engine_unpin_breadcrumbs_irq(engine); - engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; -} - -static void guc_submission_unpark(struct intel_engine_cs *engine) -{ - engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; - intel_engine_pin_breadcrumbs_irq(engine); -} - -static void guc_set_default_submission(struct intel_engine_cs *engine) -{ - /* - * We inherit a bunch of functions from execlists that we'd like - * to keep using: - * - * engine->submit_request = execlists_submit_request; - * engine->cancel_requests = execlists_cancel_requests; - * engine->schedule = execlists_schedule; - * - * But we need to override the actual submission backend in order - * to talk to the GuC. - */ - intel_execlists_set_default_submission(engine); - - engine->execlists.tasklet.func = guc_submission_tasklet; - - engine->park = guc_submission_park; - engine->unpark = guc_submission_unpark; - - engine->reset.prepare = guc_reset_prepare; - engine->reset.reset = guc_reset; - engine->reset.finish = guc_reset_finish; - - engine->cancel_requests = guc_cancel_requests; - - engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; -} - -int intel_guc_submission_enable(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - /* - * We're using GuC work items for submitting work through GuC. Since - * we're coalescing multiple requests from a single context into a - * single work item prior to assigning it to execlist_port, we can - * never have more work items than the total number of ports (for all - * engines). The GuC firmware is controlling the HEAD of work queue, - * and it is guaranteed that it will remove the work item from the - * queue before our request is completed. - */ - BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) * - sizeof(struct guc_wq_item) * - I915_NUM_ENGINES > GUC_WQ_SIZE); - - GEM_BUG_ON(!guc->execbuf_client); - - err = guc_clients_enable(guc); - if (err) - return err; - - /* Take over from manual control of ELSP (execlists) */ - guc_interrupts_capture(dev_priv); - - for_each_engine(engine, dev_priv, id) { - engine->set_default_submission = guc_set_default_submission; - engine->set_default_submission(engine); - } - - return 0; -} - -void intel_guc_submission_disable(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ - - guc_interrupts_release(dev_priv); - guc_clients_disable(guc); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_guc.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h deleted file mode 100644 index 7d823a513b9c..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef _INTEL_GUC_SUBMISSION_H_ -#define _INTEL_GUC_SUBMISSION_H_ - -#include <linux/spinlock.h> - -#include "gt/intel_engine_types.h" - -#include "i915_gem.h" -#include "i915_selftest.h" - -struct drm_i915_private; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct intel_guc_client { - struct i915_vma *vma; - void *vaddr; - struct i915_gem_context *owner; - struct intel_guc *guc; - - /* bitmap of (host) engine ids */ - u32 engines; - u32 priority; - u32 stage_id; - u32 proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - /* Protects GuC client's WQ access */ - spinlock_t wq_lock; - /* Per-engine counts of GuC submissions */ - u64 submissions[I915_NUM_ENGINES]; - - /* For testing purposes, use nop WQ items instead of real ones */ - I915_SELFTEST_DECLARE(bool use_nop_wqi); -}; - -int intel_guc_submission_init(struct intel_guc *guc); -int intel_guc_submission_enable(struct intel_guc *guc); -void intel_guc_submission_disable(struct intel_guc *guc); -void intel_guc_submission_fini(struct intel_guc *guc); -int intel_guc_preempt_work_create(struct intel_guc *guc); -void intel_guc_preempt_work_destroy(struct intel_guc *guc); - -#endif diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 1d7d26e4cf14..2b6c016387c2 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -95,7 +95,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) { int ret; - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(dev_priv)) return -ENODEV; if (!i915_modparams.enable_gvt) { @@ -122,13 +122,14 @@ bail: } /** - * intel_gvt_cleanup - cleanup GVT components when i915 driver is unloading + * intel_gvt_driver_remove - cleanup GVT components when i915 driver is + * unbinding * @dev_priv: drm i915 private * * * This function is called at the i915 driver unloading stage, to shutdown * GVT components and release the related resources. */ -void intel_gvt_cleanup(struct drm_i915_private *dev_priv) +void intel_gvt_driver_remove(struct drm_i915_private *dev_priv) { if (!intel_gvt_active(dev_priv)) return; diff --git a/drivers/gpu/drm/i915/intel_gvt.h b/drivers/gpu/drm/i915/intel_gvt.h index 61b246470282..502fad8a8652 100644 --- a/drivers/gpu/drm/i915/intel_gvt.h +++ b/drivers/gpu/drm/i915/intel_gvt.h @@ -24,11 +24,11 @@ #ifndef _INTEL_GVT_H_ #define _INTEL_GVT_H_ -struct intel_gvt; +struct drm_i915_private; #ifdef CONFIG_DRM_I915_GVT int intel_gvt_init(struct drm_i915_private *dev_priv); -void intel_gvt_cleanup(struct drm_i915_private *dev_priv); +void intel_gvt_driver_remove(struct drm_i915_private *dev_priv); int intel_gvt_init_device(struct drm_i915_private *dev_priv); void intel_gvt_clean_device(struct drm_i915_private *dev_priv); int intel_gvt_init_host(void); @@ -38,7 +38,8 @@ static inline int intel_gvt_init(struct drm_i915_private *dev_priv) { return 0; } -static inline void intel_gvt_cleanup(struct drm_i915_private *dev_priv) + +static inline void intel_gvt_driver_remove(struct drm_i915_private *dev_priv) { } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c deleted file mode 100644 index fb6f693d3cac..000000000000 --- a/drivers/gpu/drm/i915/intel_huc.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright © 2016-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include <linux/types.h> - -#include "intel_huc.h" -#include "i915_drv.h" - -void intel_huc_init_early(struct intel_huc *huc) -{ - struct drm_i915_private *i915 = huc_to_i915(huc); - - intel_huc_fw_init_early(huc); - - if (INTEL_GEN(i915) >= 11) { - huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; - huc->status.mask = HUC_LOAD_SUCCESSFUL; - huc->status.value = HUC_LOAD_SUCCESSFUL; - } else { - huc->status.reg = HUC_STATUS2; - huc->status.mask = HUC_FW_VERIFIED; - huc->status.value = HUC_FW_VERIFIED; - } -} - -int intel_huc_init_misc(struct intel_huc *huc) -{ - struct drm_i915_private *i915 = huc_to_i915(huc); - - intel_uc_fw_fetch(i915, &huc->fw); - return 0; -} - -static int intel_huc_rsa_data_create(struct intel_huc *huc) -{ - struct drm_i915_private *i915 = huc_to_i915(huc); - struct intel_guc *guc = &i915->guc; - struct i915_vma *vma; - void *vaddr; - - /* - * HuC firmware will sit above GUC_GGTT_TOP and will not map - * through GTT. Unfortunately, this means GuC cannot perform - * the HuC auth. as the rsa offset now falls within the GuC - * inaccessible range. We resort to perma-pinning an additional - * vma within the accessible range that only contains the rsa - * signature. The GuC can use this extra pinning to perform - * the authentication since its GGTT offset will be GuC - * accessible. - */ - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - huc->rsa_data = vma; - huc->rsa_data_vaddr = vaddr; - - return 0; -} - -static void intel_huc_rsa_data_destroy(struct intel_huc *huc) -{ - i915_vma_unpin_and_release(&huc->rsa_data, I915_VMA_RELEASE_MAP); -} - -int intel_huc_init(struct intel_huc *huc) -{ - int err; - - err = intel_huc_rsa_data_create(huc); - if (err) - return err; - - return intel_uc_fw_init(&huc->fw); -} - -void intel_huc_fini(struct intel_huc *huc) -{ - intel_uc_fw_fini(&huc->fw); - intel_huc_rsa_data_destroy(huc); -} - -/** - * intel_huc_auth() - Authenticate HuC uCode - * @huc: intel_huc structure - * - * Called after HuC and GuC firmware loading during intel_uc_init_hw(). - * - * This function pins HuC firmware image object into GGTT. - * Then it invokes GuC action to authenticate passing the offset to RSA - * signature through intel_guc_auth_huc(). It then waits for 50ms for - * firmware verification ACK and unpins the object. - */ -int intel_huc_auth(struct intel_huc *huc) -{ - struct drm_i915_private *i915 = huc_to_i915(huc); - struct intel_guc *guc = &i915->guc; - int ret; - - if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return -ENOEXEC; - - ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, huc->rsa_data)); - if (ret) { - DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto fail; - } - - /* Check authentication status, it should be done by now */ - ret = __intel_wait_for_register(&i915->uncore, - huc->status.reg, - huc->status.mask, - huc->status.value, - 2, 50, NULL); - if (ret) { - DRM_ERROR("HuC: Firmware not verified %d\n", ret); - goto fail; - } - - return 0; - -fail: - huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; - - DRM_ERROR("HuC: Authentication failed %d\n", ret); - return ret; -} - -/** - * intel_huc_check_status() - check HuC status - * @huc: intel_huc structure - * - * This function reads status register to verify if HuC - * firmware was successfully loaded. - * - * Returns: 1 if HuC firmware is loaded and verified, - * 0 if HuC firmware is not loaded and -ENODEV if HuC - * is not present on this platform. - */ -int intel_huc_check_status(struct intel_huc *huc) -{ - struct drm_i915_private *dev_priv = huc_to_i915(huc); - intel_wakeref_t wakeref; - bool status = false; - - if (!HAS_HUC(dev_priv)) - return -ENODEV; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - status = (I915_READ(huc->status.reg) & huc->status.mask) == - huc->status.value; - - return status; -} diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h deleted file mode 100644 index 2a6c94e79f17..000000000000 --- a/drivers/gpu/drm/i915/intel_huc.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef _INTEL_HUC_H_ -#define _INTEL_HUC_H_ - -#include "i915_reg.h" -#include "intel_uc_fw.h" -#include "intel_huc_fw.h" - -struct intel_huc { - /* Generic uC firmware management */ - struct intel_uc_fw fw; - - /* HuC-specific additions */ - struct i915_vma *rsa_data; - void *rsa_data_vaddr; - - struct { - i915_reg_t reg; - u32 mask; - u32 value; - } status; -}; - -void intel_huc_init_early(struct intel_huc *huc); -int intel_huc_init_misc(struct intel_huc *huc); -int intel_huc_init(struct intel_huc *huc); -void intel_huc_fini(struct intel_huc *huc); -int intel_huc_auth(struct intel_huc *huc); -int intel_huc_check_status(struct intel_huc *huc); - -static inline void intel_huc_fini_misc(struct intel_huc *huc) -{ - intel_uc_fw_cleanup_fetch(&huc->fw); -} - -static inline int intel_huc_sanitize(struct intel_huc *huc) -{ - intel_uc_fw_sanitize(&huc->fw); - return 0; -} - -#endif diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c deleted file mode 100644 index 05cbf8338f53..000000000000 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#include "intel_huc_fw.h" -#include "i915_drv.h" - -/** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - -#define BXT_HUC_FW_MAJOR 01 -#define BXT_HUC_FW_MINOR 8 -#define BXT_BLD_NUM 2893 - -#define SKL_HUC_FW_MAJOR 01 -#define SKL_HUC_FW_MINOR 07 -#define SKL_BLD_NUM 1398 - -#define KBL_HUC_FW_MAJOR 02 -#define KBL_HUC_FW_MINOR 00 -#define KBL_BLD_NUM 1810 - -#define GLK_HUC_FW_MAJOR 03 -#define GLK_HUC_FW_MINOR 01 -#define GLK_BLD_NUM 2893 - -#define ICL_HUC_FW_MAJOR 8 -#define ICL_HUC_FW_MINOR 4 -#define ICL_BLD_NUM 3238 - -#define HUC_FW_PATH(platform, major, minor, bld_num) \ - "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ - __stringify(minor) "_" __stringify(bld_num) ".bin" - -#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ - SKL_HUC_FW_MINOR, SKL_BLD_NUM) -MODULE_FIRMWARE(I915_SKL_HUC_UCODE); - -#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ - BXT_HUC_FW_MINOR, BXT_BLD_NUM) -MODULE_FIRMWARE(I915_BXT_HUC_UCODE); - -#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ - KBL_HUC_FW_MINOR, KBL_BLD_NUM) -MODULE_FIRMWARE(I915_KBL_HUC_UCODE); - -#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ - GLK_HUC_FW_MINOR, GLK_BLD_NUM) -MODULE_FIRMWARE(I915_GLK_HUC_UCODE); - -#define I915_ICL_HUC_UCODE HUC_FW_PATH(icl, ICL_HUC_FW_MAJOR, \ - ICL_HUC_FW_MINOR, ICL_BLD_NUM) -MODULE_FIRMWARE(I915_ICL_HUC_UCODE); - -static void huc_fw_select(struct intel_uc_fw *huc_fw) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - if (!HAS_HUC(dev_priv)) - return; - - if (i915_modparams.huc_firmware_path) { - huc_fw->path = i915_modparams.huc_firmware_path; - huc_fw->major_ver_wanted = 0; - huc_fw->minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc_fw->path = I915_SKL_HUC_UCODE; - huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc_fw->path = I915_BXT_HUC_UCODE; - huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc_fw->path = I915_KBL_HUC_UCODE; - huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - huc_fw->path = I915_GLK_HUC_UCODE; - huc_fw->major_ver_wanted = GLK_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = GLK_HUC_FW_MINOR; - } else if (IS_ICELAKE(dev_priv)) { - huc_fw->path = I915_ICL_HUC_UCODE; - huc_fw->major_ver_wanted = ICL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = ICL_HUC_FW_MINOR; - } -} - -/** - * intel_huc_fw_init_early() - initializes HuC firmware struct - * @huc: intel_huc struct - * - * On platforms with HuC selects firmware for uploading - */ -void intel_huc_fw_init_early(struct intel_huc *huc) -{ - struct intel_uc_fw *huc_fw = &huc->fw; - - intel_uc_fw_init_early(huc_fw, INTEL_UC_FW_TYPE_HUC); - huc_fw_select(huc_fw); -} - -static void huc_xfer_rsa(struct intel_huc *huc) -{ - struct intel_uc_fw *fw = &huc->fw; - struct sg_table *pages = fw->obj->mm.pages; - - /* - * HuC firmware image is outside GuC accessible range. - * Copy the RSA signature out of the image into - * the perma-pinned region set aside for it - */ - sg_pcopy_to_buffer(pages->sgl, pages->nents, - huc->rsa_data_vaddr, fw->rsa_size, - fw->rsa_offset); -} - -static int huc_xfer_ucode(struct intel_huc *huc) -{ - struct intel_uc_fw *huc_fw = &huc->fw; - struct drm_i915_private *dev_priv = huc_to_i915(huc); - struct intel_uncore *uncore = &dev_priv->uncore; - unsigned long offset = 0; - u32 size; - int ret; - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - /* Set the source address for the uCode */ - offset = intel_uc_fw_ggtt_offset(huc_fw) + - huc_fw->header_offset; - intel_uncore_write(uncore, DMA_ADDR_0_LOW, - lower_32_bits(offset)); - intel_uncore_write(uncore, DMA_ADDR_0_HIGH, - upper_32_bits(offset) & 0xFFFF); - - /* - * Hardware doesn't look at destination address for HuC. Set it to 0, - * but still program the correct address space. - */ - intel_uncore_write(uncore, DMA_ADDR_1_LOW, 0); - intel_uncore_write(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - size = huc_fw->header_size + huc_fw->ucode_size; - intel_uncore_write(uncore, DMA_COPY_SIZE, size); - - /* Start the DMA */ - intel_uncore_write(uncore, DMA_CTRL, - _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); - - /* Wait for DMA to finish */ - ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100); - - DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); - - /* Disable the bits once DMA is over */ - intel_uncore_write(uncore, DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - return ret; -} - -/** - * huc_fw_xfer() - DMA's the firmware - * @huc_fw: the firmware descriptor - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_fw_xfer(struct intel_uc_fw *huc_fw) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - - huc_xfer_rsa(huc); - - return huc_xfer_ucode(huc); -} - -/** - * intel_huc_fw_upload() - load HuC uCode to device - * @huc: intel_huc structure - * - * Called from intel_uc_init_hw() during driver load, resume from sleep and - * after a GPU reset. Note that HuC must be loaded before GuC. - * - * The firmware image should have already been fetched into memory, so only - * check that fetch succeeded, and then transfer the image to the h/w. - * - * Return: non-zero code on error - */ -int intel_huc_fw_upload(struct intel_huc *huc) -{ - return intel_uc_fw_upload(&huc->fw, huc_fw_xfer); -} diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c new file mode 100644 index 000000000000..d0d038b3cd79 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_drv.h" + +/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */ +#define REGION_MAP(type, inst) \ + BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst) + +const u32 intel_region_map[] = { + [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0), + [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0), + [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), +}; + +struct intel_memory_region * +intel_memory_region_by_type(struct drm_i915_private *i915, + enum intel_memory_type mem_type) +{ + struct intel_memory_region *mr; + int id; + + for_each_memory_region(mr, i915, id) + if (mr->type == mem_type) + return mr; + + return NULL; +} + +static u64 +intel_memory_region_free_pages(struct intel_memory_region *mem, + struct list_head *blocks) +{ + struct i915_buddy_block *block, *on; + u64 size = 0; + + list_for_each_entry_safe(block, on, blocks, link) { + size += i915_buddy_block_size(&mem->mm, block); + i915_buddy_free(&mem->mm, block); + } + INIT_LIST_HEAD(blocks); + + return size; +} + +void +__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks) +{ + mutex_lock(&mem->mm_lock); + mem->avail += intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); +} + +void +__intel_memory_region_put_block_buddy(struct i915_buddy_block *block) +{ + struct list_head blocks; + + INIT_LIST_HEAD(&blocks); + list_add(&block->link, &blocks); + __intel_memory_region_put_pages_buddy(block->private, &blocks); +} + +int +__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks) +{ + unsigned int min_order = 0; + unsigned long n_pages; + + GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); + GEM_BUG_ON(!list_empty(blocks)); + + if (flags & I915_ALLOC_MIN_PAGE_SIZE) { + min_order = ilog2(mem->min_page_size) - + ilog2(mem->mm.chunk_size); + } + + if (flags & I915_ALLOC_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mem->mm.chunk_size); + } + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return -E2BIG; + + n_pages = size >> ilog2(mem->mm.chunk_size); + + mutex_lock(&mem->mm_lock); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mem->mm.max_order); + GEM_BUG_ON(order < min_order); + + do { + block = i915_buddy_alloc(&mem->mm, order); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) + goto err_free_blocks; + } while (1); + + n_pages -= BIT(order); + + block->private = mem; + list_add(&block->link, blocks); + + if (!n_pages) + break; + } while (1); + + mem->avail -= size; + mutex_unlock(&mem->mm_lock); + return 0; + +err_free_blocks: + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); + return -ENXIO; +} + +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct i915_buddy_block *block; + LIST_HEAD(blocks); + int ret; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks); + if (ret) + return ERR_PTR(ret); + + block = list_first_entry(&blocks, typeof(*block), link); + list_del_init(&block->link); + return block; +} + +int intel_memory_region_init_buddy(struct intel_memory_region *mem) +{ + return i915_buddy_init(&mem->mm, resource_size(&mem->region), + PAGE_SIZE); +} + +void intel_memory_region_release_buddy(struct intel_memory_region *mem) +{ + i915_buddy_fini(&mem->mm); +} + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops) +{ + struct intel_memory_region *mem; + int err; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + mem->i915 = i915; + mem->region = (struct resource)DEFINE_RES_MEM(start, size); + mem->io_start = io_start; + mem->min_page_size = min_page_size; + mem->ops = ops; + mem->total = size; + mem->avail = mem->total; + + mutex_init(&mem->objects.lock); + INIT_LIST_HEAD(&mem->objects.list); + INIT_LIST_HEAD(&mem->objects.purgeable); + + mutex_init(&mem->mm_lock); + + if (ops->init) { + err = ops->init(mem); + if (err) + goto err_free; + } + + kref_init(&mem->kref); + return mem; + +err_free: + kfree(mem); + return ERR_PTR(err); +} + +void intel_memory_region_set_name(struct intel_memory_region *mem, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vsnprintf(mem->name, sizeof(mem->name), fmt, ap); + va_end(ap); +} + +static void __intel_memory_region_destroy(struct kref *kref) +{ + struct intel_memory_region *mem = + container_of(kref, typeof(*mem), kref); + + if (mem->ops->release) + mem->ops->release(mem); + + mutex_destroy(&mem->mm_lock); + mutex_destroy(&mem->objects.lock); + kfree(mem); +} + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem) +{ + kref_get(&mem->kref); + return mem; +} + +void intel_memory_region_put(struct intel_memory_region *mem) +{ + kref_put(&mem->kref, __intel_memory_region_destroy); +} + +/* Global memory region registration -- only slight layer inversions! */ + +int intel_memory_regions_hw_probe(struct drm_i915_private *i915) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { + struct intel_memory_region *mem = ERR_PTR(-ENODEV); + u32 type; + + if (!HAS_REGION(i915, BIT(i))) + continue; + + type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + switch (type) { + case INTEL_MEMORY_SYSTEM: + mem = i915_gem_shmem_setup(i915); + break; + case INTEL_MEMORY_STOLEN: + mem = i915_gem_stolen_setup(i915); + break; + case INTEL_MEMORY_LOCAL: + mem = intel_setup_fake_lmem(i915); + break; + } + + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); + goto out_cleanup; + } + + mem->id = intel_region_map[i]; + mem->type = type; + mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + + i915->mm.regions[i] = mem; + } + + return 0; + +out_cleanup: + intel_memory_regions_driver_release(i915); + return err; +} + +void intel_memory_regions_driver_release(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { + struct intel_memory_region *region = + fetch_and_zero(&i915->mm.regions[i]); + + if (region) + intel_memory_region_put(region); + } +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_memory_region.c" +#include "selftests/mock_region.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h new file mode 100644 index 000000000000..232490d89a83 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_MEMORY_REGION_H__ +#define __INTEL_MEMORY_REGION_H__ + +#include <linux/kref.h> +#include <linux/ioport.h> +#include <linux/mutex.h> +#include <linux/io-mapping.h> +#include <drm/drm_mm.h> + +#include "i915_buddy.h" + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; +struct sg_table; + +/** + * Base memory type + */ +enum intel_memory_type { + INTEL_MEMORY_SYSTEM = 0, + INTEL_MEMORY_LOCAL, + INTEL_MEMORY_STOLEN, +}; + +enum intel_region_id { + INTEL_REGION_SMEM = 0, + INTEL_REGION_LMEM, + INTEL_REGION_STOLEN, + INTEL_REGION_UNKNOWN, /* Should be last */ +}; + +#define REGION_SMEM BIT(INTEL_REGION_SMEM) +#define REGION_LMEM BIT(INTEL_REGION_LMEM) +#define REGION_STOLEN BIT(INTEL_REGION_STOLEN) + +#define INTEL_MEMORY_TYPE_SHIFT 16 + +#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT)) +#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff)) + +#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) +#define I915_ALLOC_CONTIGUOUS BIT(1) + +#define for_each_memory_region(mr, i915, id) \ + for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ + for_each_if((mr) = (i915)->mm.regions[id]) + +/** + * Memory regions encoded as type | instance + */ +extern const u32 intel_region_map[]; + +struct intel_memory_region_ops { + unsigned int flags; + + int (*init)(struct intel_memory_region *mem); + void (*release)(struct intel_memory_region *mem); + + struct drm_i915_gem_object * + (*create_object)(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +}; + +struct intel_memory_region { + struct drm_i915_private *i915; + + const struct intel_memory_region_ops *ops; + + struct io_mapping iomap; + struct resource region; + + /* For fake LMEM */ + struct drm_mm_node fake_mappable; + + struct i915_buddy_mm mm; + struct mutex mm_lock; + + struct kref kref; + + resource_size_t io_start; + resource_size_t min_page_size; + resource_size_t total; + resource_size_t avail; + + unsigned int type; + unsigned int instance; + unsigned int id; + char name[8]; + + dma_addr_t remap_addr; + + struct { + struct mutex lock; /* Protects access to objects */ + struct list_head list; + struct list_head purgeable; + } objects; +}; + +int intel_memory_region_init_buddy(struct intel_memory_region *mem); +void intel_memory_region_release_buddy(struct intel_memory_region *mem); + +int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks); +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks); +void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops); + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem); +void intel_memory_region_put(struct intel_memory_region *mem); + +int intel_memory_regions_hw_probe(struct drm_i915_private *i915); +void intel_memory_regions_driver_release(struct drm_i915_private *i915); +struct intel_memory_region * +intel_memory_region_by_type(struct drm_i915_private *i915, + enum intel_memory_type mem_type); + +__printf(2, 3) void +intel_memory_region_set_name(struct intel_memory_region *mem, + const char *fmt, ...); + +#endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c new file mode 100644 index 000000000000..4ed60e1f01db --- /dev/null +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2019 Intel Corporation. + */ + +#include "i915_drv.h" +#include "intel_pch.h" + +/* Map PCH device id to PCH type, or PCH_NONE if unknown. */ +static enum intel_pch +intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) +{ + switch (id) { + case INTEL_PCH_IBX_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n"); + WARN_ON(!IS_GEN(dev_priv, 5)); + return PCH_IBX; + case INTEL_PCH_CPT_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n"); + WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); + return PCH_CPT; + case INTEL_PCH_PPT_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n"); + WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); + /* PantherPoint is CPT compatible */ + return PCH_CPT; + case INTEL_PCH_LPT_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); + return PCH_LPT; + case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); + return PCH_LPT; + case INTEL_PCH_WPT_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); + /* WildcatPoint is LPT compatible */ + return PCH_LPT; + case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); + /* WildcatPoint is LPT compatible */ + return PCH_LPT; + case INTEL_PCH_SPT_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + return PCH_SPT; + case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); + return PCH_SPT; + case INTEL_PCH_KBP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); + /* KBP is SPT compatible */ + return PCH_SPT; + case INTEL_PCH_CNP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); + return PCH_CNP; + case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, + "Found Cannon Lake LP PCH (CNP-LP)\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); + return PCH_CNP; + case INTEL_PCH_CMP_DEVICE_ID_TYPE: + case INTEL_PCH_CMP2_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n"); + WARN_ON(!IS_COFFEELAKE(dev_priv)); + /* CometPoint is CNP Compatible */ + return PCH_CNP; + case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n"); + WARN_ON(!IS_COFFEELAKE(dev_priv)); + /* Comet Lake V PCH is based on KBP, which is SPT compatible */ + return PCH_SPT; + case INTEL_PCH_ICP_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n"); + WARN_ON(!IS_ICELAKE(dev_priv)); + return PCH_ICP; + case INTEL_PCH_MCC_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n"); + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); + return PCH_MCC; + case INTEL_PCH_TGP_DEVICE_ID_TYPE: + case INTEL_PCH_TGP2_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n"); + WARN_ON(!IS_TIGERLAKE(dev_priv)); + return PCH_TGP; + case INTEL_PCH_JSP_DEVICE_ID_TYPE: + case INTEL_PCH_JSP2_DEVICE_ID_TYPE: + drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n"); + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); + return PCH_JSP; + default: + return PCH_NONE; + } +} + +static bool intel_is_virt_pch(unsigned short id, + unsigned short svendor, unsigned short sdevice) +{ + return (id == INTEL_PCH_P2X_DEVICE_ID_TYPE || + id == INTEL_PCH_P3X_DEVICE_ID_TYPE || + (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE && + svendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET && + sdevice == PCI_SUBDEVICE_ID_QEMU)); +} + +static unsigned short +intel_virt_detect_pch(const struct drm_i915_private *dev_priv) +{ + unsigned short id = 0; + + /* + * In a virtualized passthrough environment we can be in a + * setup where the ISA bridge is not able to be passed through. + * In this case, a south bridge can be emulated and we have to + * make an educated guess as to which PCH is really there. + */ + + if (IS_TIGERLAKE(dev_priv)) + id = INTEL_PCH_TGP_DEVICE_ID_TYPE; + else if (IS_ELKHARTLAKE(dev_priv)) + id = INTEL_PCH_MCC_DEVICE_ID_TYPE; + else if (IS_ICELAKE(dev_priv)) + id = INTEL_PCH_ICP_DEVICE_ID_TYPE; + else if (IS_CANNONLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + id = INTEL_PCH_CNP_DEVICE_ID_TYPE; + else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) + id = INTEL_PCH_SPT_DEVICE_ID_TYPE; + else if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) + id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + id = INTEL_PCH_LPT_DEVICE_ID_TYPE; + else if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) + id = INTEL_PCH_CPT_DEVICE_ID_TYPE; + else if (IS_GEN(dev_priv, 5)) + id = INTEL_PCH_IBX_DEVICE_ID_TYPE; + + if (id) + drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id); + else + drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n"); + + return id; +} + +void intel_detect_pch(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pch = NULL; + + /* + * The reason to probe ISA bridge instead of Dev31:Fun0 is to + * make graphics device passthrough work easy for VMM, that only + * need to expose ISA bridge to let driver know the real hardware + * underneath. This is a requirement from virtualization team. + * + * In some virtualized environments (e.g. XEN), there is irrelevant + * ISA bridge in the system. To work reliably, we should scan trhough + * all the ISA bridge devices and check for the first match, instead + * of only checking the first one. + */ + while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { + unsigned short id; + enum intel_pch pch_type; + + if (pch->vendor != PCI_VENDOR_ID_INTEL) + continue; + + id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + + pch_type = intel_pch_type(dev_priv, id); + if (pch_type != PCH_NONE) { + dev_priv->pch_type = pch_type; + dev_priv->pch_id = id; + break; + } else if (intel_is_virt_pch(id, pch->subsystem_vendor, + pch->subsystem_device)) { + id = intel_virt_detect_pch(dev_priv); + pch_type = intel_pch_type(dev_priv, id); + + /* Sanity check virtual PCH id */ + if (WARN_ON(id && pch_type == PCH_NONE)) + id = 0; + + dev_priv->pch_type = pch_type; + dev_priv->pch_id = id; + break; + } + } + + /* + * Use PCH_NOP (PCH but no South Display) for PCH platforms without + * display. + */ + if (pch && !HAS_DISPLAY(dev_priv)) { + drm_dbg_kms(&dev_priv->drm, + "Display disabled, reverting to NOP PCH\n"); + dev_priv->pch_type = PCH_NOP; + dev_priv->pch_id = 0; + } + + if (!pch) + drm_dbg_kms(&dev_priv->drm, "No PCH found.\n"); + + pci_dev_put(pch); +} diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h new file mode 100644 index 000000000000..3053d1ce398b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2019 Intel Corporation. + */ + +#ifndef __INTEL_PCH__ +#define __INTEL_PCH__ + +struct drm_i915_private; + +/* + * Sorted by south display engine compatibility. + * If the new PCH comes with a south display engine that is not + * inherited from the latest item, please do not add it to the + * end. Instead, add it right after its "parent" PCH. + */ +enum intel_pch { + PCH_NOP = -1, /* PCH without south display */ + PCH_NONE = 0, /* No PCH present */ + PCH_IBX, /* Ibexpeak PCH */ + PCH_CPT, /* Cougarpoint/Pantherpoint PCH */ + PCH_LPT, /* Lynxpoint/Wildcatpoint PCH */ + PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */ + PCH_CNP, /* Cannon/Comet Lake PCH */ + PCH_ICP, /* Ice Lake PCH */ + PCH_JSP, /* Jasper Lake PCH */ + PCH_MCC, /* Mule Creek Canyon PCH */ + PCH_TGP, /* Tiger Lake PCH */ +}; + +#define INTEL_PCH_DEVICE_ID_MASK 0xff80 +#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 +#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 +#define INTEL_PCH_PPT_DEVICE_ID_TYPE 0x1e00 +#define INTEL_PCH_LPT_DEVICE_ID_TYPE 0x8c00 +#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE 0x9c00 +#define INTEL_PCH_WPT_DEVICE_ID_TYPE 0x8c80 +#define INTEL_PCH_WPT_LP_DEVICE_ID_TYPE 0x9c80 +#define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100 +#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 +#define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 +#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 +#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 +#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 +#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 +#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 +#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 +#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 +#define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 +#define INTEL_PCH_TGP2_DEVICE_ID_TYPE 0x4380 +#define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80 +#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 +#define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 +#define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 +#define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ + +#define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) +#define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP) +#define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC) +#define HAS_PCH_TGP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_TGP) +#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) +#define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) +#define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) +#define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) +#define HAS_PCH_LPT_LP(dev_priv) \ + (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE || \ + INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE) +#define HAS_PCH_LPT_H(dev_priv) \ + (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_DEVICE_ID_TYPE || \ + INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_DEVICE_ID_TYPE) +#define HAS_PCH_CPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CPT) +#define HAS_PCH_IBX(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_IBX) +#define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) +#define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE) + +void intel_detect_pch(struct drm_i915_private *dev_priv); + +#endif /* __INTEL_PCH__ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d9a7a13ce32a..bd2d30ecc030 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -25,7 +25,6 @@ * */ -#include <linux/cpufreq.h> #include <linux/module.h> #include <linux/pm_runtime.h> @@ -34,36 +33,19 @@ #include <drm/drm_plane_helper.h> #include "display/intel_atomic.h" +#include "display/intel_display_types.h" #include "display/intel_fbc.h" #include "display/intel_sprite.h" +#include "gt/intel_llc.h" + #include "i915_drv.h" #include "i915_irq.h" -#include "intel_drv.h" +#include "i915_trace.h" #include "intel_pm.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" -/** - * DOC: RC6 - * - * RC6 is a special power stage which allows the GPU to enter an very - * low-voltage mode when idle, using down to 0V while at this stage. This - * stage is entered automatically when the GPU is idle when RC6 support is - * enabled, and as soon as new workload arises GPU wakes up automatically as well. - * - * There are different RC6 modes available in Intel GPU, which differentiate - * among each other with the latency required to enter and leave RC6 and - * voltage consumed by the GPU in different states. - * - * The combination of the following flags define which states GPU is allowed - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and - * RC6pp is deepest RC6. Their support by hardware varies according to the - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one - * which brings the most power savings; deeper states save more power, but - * require higher latency to switch to and wake up. - */ - static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { if (HAS_LLC(dev_priv)) { @@ -125,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) @@ -150,7 +140,7 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv) } -static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) +static void pnv_get_mem_freq(struct drm_i915_private *dev_priv) { u32 tmp; @@ -188,7 +178,7 @@ static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; } -static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) +static void ilk_get_mem_freq(struct drm_i915_private *dev_priv) { u16 ddrpll, csipll; @@ -209,14 +199,12 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->mem_freq = 1600; break; default: - DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", - ddrpll & 0xff); + drm_dbg(&dev_priv->drm, "unknown memory frequency 0x%02x\n", + ddrpll & 0xff); dev_priv->mem_freq = 0; break; } - dev_priv->ips.r_t = dev_priv->mem_freq; - switch (csipll & 0x3ff) { case 0x00c: dev_priv->fsb_freq = 3200; @@ -240,19 +228,11 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 6400; break; default: - DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", - csipll & 0x3ff); + drm_dbg(&dev_priv->drm, "unknown fsb frequency 0x%04x\n", + csipll & 0x3ff); dev_priv->fsb_freq = 0; break; } - - if (dev_priv->fsb_freq == 3200) { - dev_priv->ips.c_m = 0; - } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { - dev_priv->ips.c_m = 1; - } else { - dev_priv->ips.c_m = 2; - } } static const struct cxsr_latency cxsr_latency_table[] = { @@ -334,7 +314,8 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) - DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); + drm_err(&dev_priv->drm, + "timed out waiting for Punit DDR DVFS request\n"); vlv_punit_put(dev_priv); } @@ -403,9 +384,9 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl trace_intel_memory_cxsr(dev_priv, was_enabled, enable); - DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", - enableddisabled(enable), - enableddisabled(was_enabled)); + drm_dbg_kms(&dev_priv->drm, "memory self-refresh is %s (was %s)\n", + enableddisabled(enable), + enableddisabled(was_enabled)); return was_enabled; } @@ -483,7 +464,7 @@ static const int pessimal_latency_ns = 5000; static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; enum pipe pipe = crtc->pipe; @@ -530,8 +511,8 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -547,8 +528,8 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv, size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; size >>= 1; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -562,41 +543,45 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv, size = dsparb & 0x7f; size >>= 2; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } /* Pineview has different values for various configs */ -static const struct intel_watermark_params pineview_display_wm = { +static const struct intel_watermark_params pnv_display_wm = { .fifo_size = PINEVIEW_DISPLAY_FIFO, .max_wm = PINEVIEW_MAX_WM, .default_wm = PINEVIEW_DFT_WM, .guard_size = PINEVIEW_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_display_hplloff_wm = { + +static const struct intel_watermark_params pnv_display_hplloff_wm = { .fifo_size = PINEVIEW_DISPLAY_FIFO, .max_wm = PINEVIEW_MAX_WM, .default_wm = PINEVIEW_DFT_HPLLOFF_WM, .guard_size = PINEVIEW_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_cursor_wm = { + +static const struct intel_watermark_params pnv_cursor_wm = { .fifo_size = PINEVIEW_CURSOR_FIFO, .max_wm = PINEVIEW_CURSOR_MAX_WM, .default_wm = PINEVIEW_CURSOR_DFT_WM, .guard_size = PINEVIEW_CURSOR_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_cursor_hplloff_wm = { + +static const struct intel_watermark_params pnv_cursor_hplloff_wm = { .fifo_size = PINEVIEW_CURSOR_FIFO, .max_wm = PINEVIEW_CURSOR_MAX_WM, .default_wm = PINEVIEW_CURSOR_DFT_WM, .guard_size = PINEVIEW_CURSOR_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i965_cursor_wm_info = { .fifo_size = I965_CURSOR_FIFO, .max_wm = I965_CURSOR_MAX_WM, @@ -604,6 +589,7 @@ static const struct intel_watermark_params i965_cursor_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i945_wm_info = { .fifo_size = I945_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -611,6 +597,7 @@ static const struct intel_watermark_params i945_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i915_wm_info = { .fifo_size = I915_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -618,6 +605,7 @@ static const struct intel_watermark_params i915_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i830_a_wm_info = { .fifo_size = I855GM_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -625,6 +613,7 @@ static const struct intel_watermark_params i830_a_wm_info = { .guard_size = 2, .cacheline_size = I830_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i830_bc_wm_info = { .fifo_size = I855GM_FIFO_SIZE, .max_wm = I915_MAX_WM/2, @@ -632,6 +621,7 @@ static const struct intel_watermark_params i830_bc_wm_info = { .guard_size = 2, .cacheline_size = I830_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i845_wm_info = { .fifo_size = I830_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -814,10 +804,10 @@ static int intel_wm_num_levels(struct drm_i915_private *dev_priv) static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); /* FIXME check the 'enable' instead */ - if (!crtc_state->base.active) + if (!crtc_state->hw.active) return false; /* @@ -829,9 +819,28 @@ static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, * around this problem with the watermark code. */ if (plane->id == PLANE_CURSOR) - return plane_state->base.fb != NULL; + return plane_state->hw.fb != NULL; else - return plane_state->base.visible; + return plane_state->uapi.visible; +} + +static bool intel_crtc_active(struct intel_crtc *crtc) +{ + /* Be paranoid as we can arrive here with only partial + * state retrieved from the hardware during setup. + * + * We can ditch the adjusted_mode.crtc_clock check as soon + * as Haswell has gained clock readout/fastboot support. + * + * We can ditch the crtc->primary->state->fb check as soon as we can + * properly reconstruct framebuffers. + * + * FIXME: The intel_crtc->active here should be switched to + * crtc->state->active once we have proper CRTC states wired up + * for atomic. + */ + return crtc->active && crtc->base.primary->state->fb && + crtc->config->hw.adjusted_mode.crtc_clock; } static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) @@ -849,7 +858,7 @@ static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) return enabled; } -static void pineview_update_wm(struct intel_crtc *unused_crtc) +static void pnv_update_wm(struct intel_crtc *unused_crtc) { struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; @@ -862,7 +871,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) dev_priv->fsb_freq, dev_priv->mem_freq); if (!latency) { - DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); + drm_dbg_kms(&dev_priv->drm, + "Unknown FSB/MEM found, disable CxSR\n"); intel_set_memory_cxsr(dev_priv, false); return; } @@ -870,25 +880,25 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) crtc = single_enabled_crtc(dev_priv); if (crtc) { const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc->config->hw.adjusted_mode; const struct drm_framebuffer *fb = crtc->base.primary->state->fb; int cpp = fb->format->cpp[0]; int clock = adjusted_mode->crtc_clock; /* Display SR */ - wm = intel_calculate_wm(clock, &pineview_display_wm, - pineview_display_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_display_wm, + pnv_display_wm.fifo_size, cpp, latency->display_sr); reg = I915_READ(DSPFW1); reg &= ~DSPFW_SR_MASK; reg |= FW_WM(wm, SR); I915_WRITE(DSPFW1, reg); - DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); + drm_dbg_kms(&dev_priv->drm, "DSPFW1 register is %x\n", reg); /* cursor SR */ - wm = intel_calculate_wm(clock, &pineview_cursor_wm, - pineview_display_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_cursor_wm, + pnv_display_wm.fifo_size, 4, latency->cursor_sr); reg = I915_READ(DSPFW3); reg &= ~DSPFW_CURSOR_SR_MASK; @@ -896,8 +906,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(DSPFW3, reg); /* Display HPLL off SR */ - wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, - pineview_display_hplloff_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_display_hplloff_wm, + pnv_display_hplloff_wm.fifo_size, cpp, latency->display_hpll_disable); reg = I915_READ(DSPFW3); reg &= ~DSPFW_HPLL_SR_MASK; @@ -905,14 +915,14 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(DSPFW3, reg); /* cursor HPLL off SR */ - wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, - pineview_display_hplloff_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_cursor_hplloff_wm, + pnv_display_hplloff_wm.fifo_size, 4, latency->cursor_hpll_disable); reg = I915_READ(DSPFW3); reg &= ~DSPFW_HPLL_CURSOR_MASK; reg |= FW_WM(wm, HPLL_CURSOR); I915_WRITE(DSPFW3, reg); - DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); + drm_dbg_kms(&dev_priv->drm, "DSPFW3 register is %x\n", reg); intel_set_memory_cxsr(dev_priv, true); } else { @@ -1103,10 +1113,10 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, int level) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; unsigned int latency = dev_priv->wm.pri_latency[level] * 10; unsigned int clock, htotal, cpp, width, wm; @@ -1116,6 +1126,8 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; + cpp = plane_state->hw.fb->format->cpp[0]; + /* * Not 100% sure which way ELK should go here as the * spec only says CL/CTG should assume 32bpp and BW @@ -1129,17 +1141,12 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, */ if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY && level != G4X_WM_LEVEL_NORMAL) - cpp = 4; - else - cpp = plane_state->base.fb->format->cpp[0]; + cpp = max(cpp, 4u); clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - if (plane->id == PLANE_CURSOR) - width = plane_state->base.crtc_w; - else - width = drm_rect_width(&plane_state->base.dst); + width = drm_rect_width(&plane_state->uapi.dst); if (plane->id == PLANE_CURSOR) { wm = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -1166,7 +1173,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, int level, enum plane_id plane_id, u16 value) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); bool dirty = false; for (; level < intel_wm_num_levels(dev_priv); level++) { @@ -1182,7 +1189,7 @@ static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, int level, u16 value) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); bool dirty = false; /* NORMAL level doesn't have an FBC watermark */ @@ -1198,14 +1205,15 @@ static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, return dirty; } -static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, u32 pri_val); static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); enum plane_id plane_id = plane->id; bool dirty = false; @@ -1258,16 +1266,18 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, out: if (dirty) { - DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n", - plane->base.name, - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); + drm_dbg_kms(&dev_priv->drm, + "%s watermarks: normal=%d, SR=%d, HPLL=%d\n", + plane->base.name, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); if (plane_id == PLANE_PRIMARY) - DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n", - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); + drm_dbg_kms(&dev_priv->drm, + "FBC watermarks: SR=%d, HPLL=%d\n", + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); } return dirty; @@ -1284,7 +1294,7 @@ static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); if (level > dev_priv->wm.max_level) return false; @@ -1322,12 +1332,12 @@ static void g4x_invalidate_wms(struct intel_crtc *crtc, static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->base.state); + to_intel_atomic_state(crtc_state->uapi.state); struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); const struct g4x_pipe_wm *raw; const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; @@ -1339,8 +1349,8 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { - if (new_plane_state->base.crtc != &crtc->base && - old_plane_state->base.crtc != &crtc->base) + if (new_plane_state->hw.crtc != &crtc->base && + old_plane_state->hw.crtc != &crtc->base) continue; if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state)) @@ -1411,17 +1421,17 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->base.state); + to_intel_atomic_state(new_crtc_state->uapi.state); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(intel_state, crtc); const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal; enum plane_id plane_id; - if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -1489,7 +1499,7 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, struct g4x_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->cxsr = true; wm->hpll_en = true; @@ -1508,10 +1518,10 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->fbc_en) wm->fbc_en = false; - num_active_crtcs++; + num_active_pipes++; } - if (num_active_crtcs != 1) { + if (num_active_pipes != 1) { wm->cxsr = false; wm->hpll_en = false; wm->fbc_en = false; @@ -1551,10 +1561,11 @@ static void g4x_program_watermarks(struct drm_i915_private *dev_priv) } static void g4x_initial_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); mutex_lock(&dev_priv->wm.wm_mutex); crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate; @@ -1563,16 +1574,17 @@ static void g4x_initial_watermarks(struct intel_atomic_state *state, } static void g4x_optimize_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); if (!crtc_state->wm.need_postvbl_update) return; mutex_lock(&dev_priv->wm.wm_mutex); - intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; + crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; g4x_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -1612,10 +1624,10 @@ static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, int level) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; unsigned int clock, htotal, cpp, width, wm; if (dev_priv->wm.pri_latency[level] == 0) @@ -1624,7 +1636,7 @@ static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; - cpp = plane_state->base.fb->format->cpp[0]; + cpp = plane_state->hw.fb->format->cpp[0]; clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; width = crtc_state->pipe_src_w; @@ -1653,12 +1665,12 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - int num_active_planes = hweight32(active_planes); + int num_active_planes = hweight8(active_planes); const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; int sprite0_fifo_extra = 0; @@ -1765,7 +1777,7 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, int level, enum plane_id plane_id, u16 value) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int num_levels = intel_wm_num_levels(dev_priv); bool dirty = false; @@ -1782,7 +1794,8 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum plane_id plane_id = plane->id; int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); int level; @@ -1810,11 +1823,12 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, out: if (dirty) - DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", - plane->base.name, - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); + drm_dbg_kms(&dev_priv->drm, + "%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); return dirty; } @@ -1840,16 +1854,16 @@ static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->base.state); + to_intel_atomic_state(crtc_state->uapi.state); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); - bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->uapi); const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; struct intel_plane *plane; @@ -1860,8 +1874,8 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { - if (new_plane_state->base.crtc != &crtc->base && - old_plane_state->base.crtc != &crtc->base) + if (new_plane_state->hw.crtc != &crtc->base && + old_plane_state->hw.crtc != &crtc->base) continue; if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state)) @@ -1908,7 +1922,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) for (level = 0; level < wm_state->num_levels; level++) { const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + const int sr_fifo_size = INTEL_NUM_PIPES(dev_priv) * 512 - 1; if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) break; @@ -1946,11 +1960,12 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) static void vlv_atomic_update_fifo(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_uncore *uncore = &dev_priv->uncore; + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; @@ -2044,17 +2059,17 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate; const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal; struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->base.state); + to_intel_atomic_state(new_crtc_state->uapi.state); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(intel_state, crtc); const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal; int level; - if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -2097,7 +2112,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->level = dev_priv->wm.max_level; wm->cxsr = true; @@ -2111,14 +2126,14 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->cxsr) wm->cxsr = false; - num_active_crtcs++; + num_active_pipes++; wm->level = min_t(int, wm->level, wm_state->num_levels - 1); } - if (num_active_crtcs != 1) + if (num_active_pipes != 1) wm->cxsr = false; - if (num_active_crtcs > 1) + if (num_active_pipes > 1) wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { @@ -2170,10 +2185,11 @@ static void vlv_program_watermarks(struct drm_i915_private *dev_priv) } static void vlv_initial_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); mutex_lock(&dev_priv->wm.wm_mutex); crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; @@ -2182,16 +2198,17 @@ static void vlv_initial_watermarks(struct intel_atomic_state *state, } static void vlv_optimize_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); if (!crtc_state->wm.need_postvbl_update) return; mutex_lock(&dev_priv->wm.wm_mutex); - intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -2210,7 +2227,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) /* self-refresh has much higher latency */ static const int sr_latency_ns = 12000; const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc->config->hw.adjusted_mode; const struct drm_framebuffer *fb = crtc->base.primary->state->fb; int clock = adjusted_mode->crtc_clock; @@ -2226,8 +2243,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) if (srwm < 0) srwm = 1; srwm &= 0x1ff; - DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", - entries, srwm); + drm_dbg_kms(&dev_priv->drm, + "self-refresh entries: %d, wm: %d\n", + entries, srwm); entries = intel_wm_method2(clock, htotal, crtc->base.cursor->state->crtc_w, 4, @@ -2240,8 +2258,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) if (cursor_sr > i965_cursor_wm_info.max_wm) cursor_sr = i965_cursor_wm_info.max_wm; - DRM_DEBUG_KMS("self-refresh watermark: display plane %d " - "cursor %d\n", srwm, cursor_sr); + drm_dbg_kms(&dev_priv->drm, + "self-refresh watermark: display plane %d " + "cursor %d\n", srwm, cursor_sr); cxsr_enabled = true; } else { @@ -2250,8 +2269,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) intel_set_memory_cxsr(dev_priv, false); } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", - srwm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", + srwm); /* 965 has limitations... */ I915_WRITE(DSPFW1, FW_WM(srwm, SR) | @@ -2291,7 +2311,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc->config->hw.adjusted_mode; const struct drm_framebuffer *fb = crtc->base.primary->state->fb; int cpp; @@ -2318,7 +2338,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc->config->hw.adjusted_mode; const struct drm_framebuffer *fb = crtc->base.primary->state->fb; int cpp; @@ -2341,7 +2361,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) planeb_wm = wm_info->max_wm; } - DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); + drm_dbg_kms(&dev_priv->drm, + "FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); if (IS_I915GM(dev_priv) && enabled) { struct drm_i915_gem_object *obj; @@ -2366,7 +2387,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) /* self-refresh has much higher latency */ static const int sr_latency_ns = 6000; const struct drm_display_mode *adjusted_mode = - &enabled->config->base.adjusted_mode; + &enabled->config->hw.adjusted_mode; const struct drm_framebuffer *fb = enabled->base.primary->state->fb; int clock = adjusted_mode->crtc_clock; @@ -2383,7 +2404,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) entries = intel_wm_method2(clock, htotal, hdisplay, cpp, sr_latency_ns / 100); entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); - DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); + drm_dbg_kms(&dev_priv->drm, + "self-refresh entries: %d\n", entries); srwm = wm_info->fifo_size - entries; if (srwm < 0) srwm = 1; @@ -2395,8 +2417,9 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", - planea_wm, planeb_wm, cwm, srwm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", + planea_wm, planeb_wm, cwm, srwm); fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); fwater_hi = (cwm & 0x1f); @@ -2424,7 +2447,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) if (crtc == NULL) return; - adjusted_mode = &crtc->config->base.adjusted_mode; + adjusted_mode = &crtc->config->hw.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, dev_priv->display.get_fifo_size(dev_priv, PLANE_A), @@ -2432,7 +2455,8 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; - DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: %d\n", planea_wm); I915_WRITE(FW_BLC, fwater_lo); } @@ -2493,8 +2517,8 @@ struct ilk_wm_maximums { * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, +static u32 ilk_compute_pri_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, u32 mem_value, bool is_lp) { u32 method1, method2; @@ -2503,19 +2527,19 @@ static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate, if (mem_value == 0) return U32_MAX; - if (!intel_wm_plane_visible(cstate, pstate)) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; - cpp = pstate->base.fb->format->cpp[0]; + cpp = plane_state->hw.fb->format->cpp[0]; - method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(cstate->pixel_rate, - cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->base.dst), + method2 = ilk_wm_method2(crtc_state->pixel_rate, + crtc_state->hw.adjusted_mode.crtc_htotal, + drm_rect_width(&plane_state->uapi.dst), cpp, mem_value); return min(method1, method2); @@ -2525,8 +2549,8 @@ static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, +static u32 ilk_compute_spr_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, u32 mem_value) { u32 method1, method2; @@ -2535,15 +2559,15 @@ static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate, if (mem_value == 0) return U32_MAX; - if (!intel_wm_plane_visible(cstate, pstate)) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; - cpp = pstate->base.fb->format->cpp[0]; + cpp = plane_state->hw.fb->format->cpp[0]; - method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); - method2 = ilk_wm_method2(cstate->pixel_rate, - cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->base.dst), + method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(crtc_state->pixel_rate, + crtc_state->hw.adjusted_mode.crtc_htotal, + drm_rect_width(&plane_state->uapi.dst), cpp, mem_value); return min(method1, method2); } @@ -2552,8 +2576,8 @@ static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, +static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, u32 mem_value) { int cpp; @@ -2561,29 +2585,31 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate, if (mem_value == 0) return U32_MAX; - if (!intel_wm_plane_visible(cstate, pstate)) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; - cpp = pstate->base.fb->format->cpp[0]; + cpp = plane_state->hw.fb->format->cpp[0]; - return ilk_wm_method2(cstate->pixel_rate, - cstate->base.adjusted_mode.crtc_htotal, - pstate->base.crtc_w, cpp, mem_value); + return ilk_wm_method2(crtc_state->pixel_rate, + crtc_state->hw.adjusted_mode.crtc_htotal, + drm_rect_width(&plane_state->uapi.dst), + cpp, mem_value); } /* Only for WM_LP. */ -static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, u32 pri_val) { int cpp; - if (!intel_wm_plane_visible(cstate, pstate)) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; - cpp = pstate->base.fb->format->cpp[0]; + cpp = plane_state->hw.fb->format->cpp[0]; - return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); + return ilk_wm_fbc(pri_val, drm_rect_width(&plane_state->uapi.dst), + cpp); } static unsigned int @@ -2647,7 +2673,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv, /* HSW allows LP1+ watermarks even with multiple pipes */ if (level == 0 || config->num_pipes_active > 1) { - fifo_size /= INTEL_INFO(dev_priv)->num_pipes; + fifo_size /= INTEL_NUM_PIPES(dev_priv); /* * For some reason the non self refresh @@ -2752,7 +2778,7 @@ static bool ilk_validate_wm_level(int level, static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, const struct intel_crtc *intel_crtc, int level, - struct intel_crtc_state *cstate, + struct intel_crtc_state *crtc_state, const struct intel_plane_state *pristate, const struct intel_plane_state *sprstate, const struct intel_plane_state *curstate, @@ -2770,30 +2796,30 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, } if (pristate) { - result->pri_val = ilk_compute_pri_wm(cstate, pristate, + result->pri_val = ilk_compute_pri_wm(crtc_state, pristate, pri_latency, level); - result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); + result->fbc_val = ilk_compute_fbc_wm(crtc_state, pristate, result->pri_val); } if (sprstate) - result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); + result->spr_val = ilk_compute_spr_wm(crtc_state, sprstate, spr_latency); if (curstate) - result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); + result->cur_val = ilk_compute_cur_wm(crtc_state, curstate, cur_latency); result->enable = true; } static u32 -hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) +hsw_compute_linetime_wm(const struct intel_crtc_state *crtc_state) { const struct intel_atomic_state *intel_state = - to_intel_atomic_state(cstate->base.state); + to_intel_atomic_state(crtc_state->uapi.state); const struct drm_display_mode *adjusted_mode = - &cstate->base.adjusted_mode; + &crtc_state->hw.adjusted_mode; u32 linetime, ips_linetime; - if (!cstate->base.active) + if (!crtc_state->hw.active) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; @@ -2829,7 +2855,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, &val, NULL); if (ret) { - DRM_ERROR("SKL Mailbox read error = %d\n", ret); + drm_err(&dev_priv->drm, + "SKL Mailbox read error = %d\n", ret); return; } @@ -2847,7 +2874,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); if (ret) { - DRM_ERROR("SKL Mailbox read error = %d\n", ret); + drm_err(&dev_priv->drm, + "SKL Mailbox read error = %d\n", ret); return; } @@ -2965,8 +2993,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, unsigned int latency = wm[level]; if (latency == 0) { - DRM_DEBUG_KMS("%s WM%d latency not provided\n", - name, level); + drm_dbg_kms(&dev_priv->drm, + "%s WM%d latency not provided\n", + name, level); continue; } @@ -2979,9 +3008,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, else if (level > 0) latency *= 5; - DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", - name, level, wm[level], - latency / 10, latency % 10); + drm_dbg_kms(&dev_priv->drm, + "%s WM%d latency %u (%u.%u usec)\n", name, level, + wm[level], latency / 10, latency % 10); } } @@ -3015,7 +3044,8 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) if (!changed) return; - DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); + drm_dbg_kms(&dev_priv->drm, + "WM latency values increased to avoid potential underruns\n"); intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); @@ -3043,7 +3073,8 @@ static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv) dev_priv->wm.spr_latency[3] = 0; dev_priv->wm.cur_latency[3] = 0; - DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n"); + drm_dbg_kms(&dev_priv->drm, + "LP3 watermarks disabled due to potential for lost interrupts\n"); intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); @@ -3093,7 +3124,7 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv, /* At least LP0 must be valid */ if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { - DRM_DEBUG_KMS("LP0 watermark invalid\n"); + drm_dbg_kms(&dev_priv->drm, "LP0 watermark invalid\n"); return false; } @@ -3101,40 +3132,36 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv, } /* Compute new watermarks for the pipe */ -static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) +static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) { - struct drm_atomic_state *state = cstate->base.state; - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_pipe_wm *pipe_wm; - struct drm_device *dev = state->dev; - const struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane *plane; - const struct drm_plane_state *plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; const struct intel_plane_state *pristate = NULL; const struct intel_plane_state *sprstate = NULL; const struct intel_plane_state *curstate = NULL; int level, max_level = ilk_wm_max_level(dev_priv), usable_level; struct ilk_wm_maximums max; - pipe_wm = &cstate->wm.ilk.optimal; - - drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) { - const struct intel_plane_state *ps = to_intel_plane_state(plane_state); + pipe_wm = &crtc_state->wm.ilk.optimal; - if (plane->type == DRM_PLANE_TYPE_PRIMARY) - pristate = ps; - else if (plane->type == DRM_PLANE_TYPE_OVERLAY) - sprstate = ps; - else if (plane->type == DRM_PLANE_TYPE_CURSOR) - curstate = ps; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) + pristate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY) + sprstate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + curstate = plane_state; } - pipe_wm->pipe_enabled = cstate->base.active; + pipe_wm->pipe_enabled = crtc_state->hw.active; if (sprstate) { - pipe_wm->sprites_enabled = sprstate->base.visible; - pipe_wm->sprites_scaled = sprstate->base.visible && - (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || - drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); + pipe_wm->sprites_enabled = sprstate->uapi.visible; + pipe_wm->sprites_scaled = sprstate->uapi.visible && + (drm_rect_width(&sprstate->uapi.dst) != drm_rect_width(&sprstate->uapi.src) >> 16 || + drm_rect_height(&sprstate->uapi.dst) != drm_rect_height(&sprstate->uapi.src) >> 16); } usable_level = max_level; @@ -3148,11 +3175,11 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) usable_level = 0; memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); - ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, + ilk_compute_wm_level(dev_priv, intel_crtc, 0, crtc_state, pristate, sprstate, curstate, &pipe_wm->wm[0]); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - pipe_wm->linetime = hsw_compute_linetime_wm(cstate); + pipe_wm->linetime = hsw_compute_linetime_wm(crtc_state); if (!ilk_validate_pipe_wm(dev_priv, pipe_wm)) return -EINVAL; @@ -3162,7 +3189,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) for (level = 1; level <= usable_level; level++) { struct intel_wm_level *wm = &pipe_wm->wm[level]; - ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, + ilk_compute_wm_level(dev_priv, intel_crtc, level, crtc_state, pristate, sprstate, curstate, wm); /* @@ -3186,11 +3213,11 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) */ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) { - struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(newstate->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; struct intel_atomic_state *intel_state = - to_intel_atomic_state(newstate->base.state); + to_intel_atomic_state(newstate->uapi.state); const struct intel_crtc_state *oldstate = intel_atomic_get_old_crtc_state(intel_state, intel_crtc); const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; @@ -3202,7 +3229,7 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) * and after the vblank. */ *a = newstate->wm.ilk.optimal; - if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) || + if (!newstate->hw.active || drm_atomic_crtc_needs_modeset(&newstate->uapi) || intel_state->skip_intermediate_wm) return 0; @@ -3612,10 +3639,8 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv, dev_priv->wm.hw = *results; } -bool ilk_disable_lp_wm(struct drm_device *dev) +bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } @@ -3653,10 +3678,47 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { + /* HACK! */ + if (IS_GEN(dev_priv, 12)) + return false; + return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } +static void +skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 12) { + u32 val = 0; + int ret; + + ret = sandybridge_pcode_read(dev_priv, + GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, + &val, NULL); + if (!ret) { + dev_priv->sagv_block_time_us = val; + return; + } + + drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n"); + } else if (IS_GEN(dev_priv, 11)) { + dev_priv->sagv_block_time_us = 10; + return; + } else if (IS_GEN(dev_priv, 10)) { + dev_priv->sagv_block_time_us = 20; + return; + } else if (IS_GEN(dev_priv, 9)) { + dev_priv->sagv_block_time_us = 30; + return; + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + } + + /* Default to an unusable block time */ + dev_priv->sagv_block_time_us = -1; +} + /* * SAGV dynamically adjusts the system agent voltage and clock frequencies * depending on power and performance requirements. The display engine access @@ -3679,7 +3741,7 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; - DRM_DEBUG_KMS("Enabling SAGV\n"); + drm_dbg_kms(&dev_priv->drm, "Enabling SAGV\n"); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); @@ -3690,11 +3752,11 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { - DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to enable SAGV\n"); + drm_err(&dev_priv->drm, "Failed to enable SAGV\n"); return ret; } @@ -3713,7 +3775,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; - DRM_DEBUG_KMS("Disabling SAGV\n"); + drm_dbg_kms(&dev_priv->drm, "Disabling SAGV\n"); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, @@ -3724,11 +3786,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { - DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to disable SAGV (%d)\n", ret); + drm_err(&dev_priv->drm, "Failed to disable SAGV (%d)\n", ret); return ret; } @@ -3736,52 +3798,43 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; } -bool intel_can_enable_sagv(struct drm_atomic_state *state) +bool intel_can_enable_sagv(struct intel_atomic_state *state) { - struct drm_device *dev = state->dev; + struct drm_device *dev = state->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct intel_crtc *crtc; struct intel_plane *plane; - struct intel_crtc_state *cstate; + struct intel_crtc_state *crtc_state; enum pipe pipe; int level, latency; - int sagv_block_time_us; if (!intel_has_sagv(dev_priv)) return false; - if (IS_GEN(dev_priv, 9)) - sagv_block_time_us = 30; - else if (IS_GEN(dev_priv, 10)) - sagv_block_time_us = 20; - else - sagv_block_time_us = 10; - /* * If there are no active CRTCs, no additional checks need be performed */ - if (hweight32(intel_state->active_crtcs) == 0) + if (hweight8(state->active_pipes) == 0) return true; /* * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled */ - if (hweight32(intel_state->active_crtcs) > 1) + if (hweight8(state->active_pipes) > 1) return false; /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(intel_state->active_crtcs) - 1; + pipe = ffs(state->active_pipes) - 1; crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - cstate = to_intel_crtc_state(crtc->base.state); + crtc_state = to_intel_crtc_state(crtc->base.state); - if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; for_each_intel_plane_on_crtc(dev, crtc, plane) { struct skl_plane_wm *wm = - &cstate->wm.skl.optimal.planes[plane->id]; + &crtc_state->wm.skl.optimal.planes[plane->id]; /* Skip this plane if it's not enabled */ if (!wm->wm[0].plane_en) @@ -3804,7 +3857,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) * incur memory latencies higher than sagv_block_time_us we * can't enable SAGV. */ - if (latency < sagv_block_time_us) + if (latency < dev_priv->sagv_block_time_us) return false; } @@ -3812,7 +3865,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) } static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *cstate, + const struct intel_crtc_state *crtc_state, const u64 total_data_rate, const int num_active, struct skl_ddb_allocation *ddb) @@ -3826,7 +3879,7 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) < 11) return ddb_size - 4; /* 4 blocks for bypass path allocation */ - adjusted_mode = &cstate->base.adjusted_mode; + adjusted_mode = &crtc_state->hw.adjusted_mode; total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode); /* @@ -3849,35 +3902,34 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, static void skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *cstate, + const struct intel_crtc_state *crtc_state, const u64 total_data_rate, struct skl_ddb_allocation *ddb, struct skl_ddb_entry *alloc, /* out */ int *num_active /* out */) { - struct drm_atomic_state *state = cstate->base.state; + struct drm_atomic_state *state = crtc_state->uapi.state; struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_crtc *for_crtc = cstate->base.crtc; - const struct drm_crtc_state *crtc_state; - const struct drm_crtc *crtc; + struct drm_crtc *for_crtc = crtc_state->uapi.crtc; + const struct intel_crtc *crtc; u32 pipe_width = 0, total_width = 0, width_before_pipe = 0; enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe; u16 ddb_size; u32 i; - if (WARN_ON(!state) || !cstate->base.active) { + if (WARN_ON(!state) || !crtc_state->hw.active) { alloc->start = 0; alloc->end = 0; - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight8(dev_priv->active_pipes); return; } if (intel_state->active_pipe_changes) - *num_active = hweight32(intel_state->active_crtcs); + *num_active = hweight8(intel_state->active_pipes); else - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight8(dev_priv->active_pipes); - ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, + ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate, *num_active, ddb); /* @@ -3902,16 +3954,15 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, * framebuffer, So instead of allocating DDB equally among pipes * distribute DDB based on resolution/width of the display. */ - for_each_new_crtc_in_state(state, crtc, crtc_state, i) { - const struct drm_display_mode *adjusted_mode; + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + enum pipe pipe = crtc->pipe; int hdisplay, vdisplay; - enum pipe pipe; - if (!crtc_state->enable) + if (!crtc_state->hw.enable) continue; - pipe = to_intel_crtc(crtc)->pipe; - adjusted_mode = &crtc_state->adjusted_mode; drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay); total_width += hdisplay; @@ -3930,7 +3981,7 @@ static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state, u64 modifier, unsigned int rotation, u32 plane_pixel_rate, struct skl_wm_params *wp, int color_plane); -static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, +static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, int level, const struct skl_wm_params *wp, const struct skl_wm_level *result_prev, @@ -3940,7 +3991,7 @@ static unsigned int skl_cursor_allocation(const struct intel_crtc_state *crtc_state, int num_active) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int level, max_level = ilk_wm_max_level(dev_priv); struct skl_wm_level wm = {}; int ret, min_ddb_alloc = 0; @@ -4007,7 +4058,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); - if (is_planar_yuv_format(fourcc)) + if (fourcc && + drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc))) swap(val, val2); skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val); @@ -4062,38 +4114,27 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, * Caller should take care of dividing & rounding off the value. */ static uint_fixed_16_16_t -skl_plane_downscale_amount(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate) +skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(pstate->base.plane); u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; - if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) + if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) return u32_to_fixed16(0); - /* n.b., src is 16.16 fixed point, dst is whole integer */ - if (plane->id == PLANE_CURSOR) { - /* - * Cursors only support 0/180 degree rotation, - * hence no need to account for rotation here. - */ - src_w = pstate->base.src_w >> 16; - src_h = pstate->base.src_h >> 16; - dst_w = pstate->base.crtc_w; - dst_h = pstate->base.crtc_h; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - src_w = drm_rect_width(&pstate->base.src) >> 16; - src_h = drm_rect_height(&pstate->base.src) >> 16; - dst_w = drm_rect_width(&pstate->base.dst); - dst_h = drm_rect_height(&pstate->base.dst); - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + * + * n.b., src is 16.16 fixed point, dst is whole integer. + */ + src_w = drm_rect_width(&plane_state->uapi.src) >> 16; + src_h = drm_rect_height(&plane_state->uapi.src) >> 16; + dst_w = drm_rect_width(&plane_state->uapi.dst); + dst_h = drm_rect_height(&plane_state->uapi.dst); fp_w_ratio = div_fixed16(src_w, dst_w); fp_h_ratio = div_fixed16(src_h, dst_h); @@ -4103,121 +4144,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, return mul_fixed16(downscale_w, downscale_h); } -static uint_fixed_16_16_t -skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) -{ - uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); - - if (!crtc_state->base.enable) - return pipe_downscale; - - if (crtc_state->pch_pfit.enabled) { - u32 src_w, src_h, dst_w, dst_h; - u32 pfit_size = crtc_state->pch_pfit.size; - uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; - uint_fixed_16_16_t downscale_h, downscale_w; - - src_w = crtc_state->pipe_src_w; - src_h = crtc_state->pipe_src_h; - dst_w = pfit_size >> 16; - dst_h = pfit_size & 0xffff; - - if (!dst_w || !dst_h) - return pipe_downscale; - - fp_w_ratio = div_fixed16(src_w, dst_w); - fp_h_ratio = div_fixed16(src_h, dst_h); - downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); - downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); - - pipe_downscale = mul_fixed16(downscale_w, downscale_h); - } - - return pipe_downscale; -} - -int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, - struct intel_crtc_state *cstate) -{ - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct drm_crtc_state *crtc_state = &cstate->base; - struct drm_atomic_state *state = crtc_state->state; - struct drm_plane *plane; - const struct drm_plane_state *pstate; - struct intel_plane_state *intel_pstate; - int crtc_clock, dotclk; - u32 pipe_max_pixel_rate; - uint_fixed_16_16_t pipe_downscale; - uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); - - if (!cstate->base.enable) - return 0; - - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { - uint_fixed_16_16_t plane_downscale; - uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); - int bpp; - - if (!intel_wm_plane_visible(cstate, - to_intel_plane_state(pstate))) - continue; - - if (WARN_ON(!pstate->fb)) - return -EINVAL; - - intel_pstate = to_intel_plane_state(pstate); - plane_downscale = skl_plane_downscale_amount(cstate, - intel_pstate); - bpp = pstate->fb->format->cpp[0] * 8; - if (bpp == 64) - plane_downscale = mul_fixed16(plane_downscale, - fp_9_div_8); - - max_downscale = max_fixed16(plane_downscale, max_downscale); - } - pipe_downscale = skl_pipe_downscale_amount(cstate); - - pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); - - crtc_clock = crtc_state->adjusted_mode.crtc_clock; - dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; - - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - dotclk *= 2; - - pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); - - if (pipe_max_pixel_rate < crtc_clock) { - DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n"); - return -EINVAL; - } - - return 0; -} - static u64 -skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, - const struct intel_plane_state *intel_pstate, - const int plane) +skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int color_plane) { - struct intel_plane *intel_plane = - to_intel_plane(intel_pstate->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; u32 data_rate; u32 width = 0, height = 0; - struct drm_framebuffer *fb; - u32 format; uint_fixed_16_16_t down_scale_amount; u64 rate; - if (!intel_pstate->base.visible) + if (!plane_state->uapi.visible) return 0; - fb = intel_pstate->base.fb; - format = fb->format->format; - - if (intel_plane->id == PLANE_CURSOR) + if (plane->id == PLANE_CURSOR) return 0; - if (plane == 1 && !is_planar_yuv_format(format)) + + if (color_plane == 1 && + !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) return 0; /* @@ -4225,55 +4171,50 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, * the 90/270 degree plane rotation cases (to match the * GTT mapping), hence no need to account for rotation here. */ - width = drm_rect_width(&intel_pstate->base.src) >> 16; - height = drm_rect_height(&intel_pstate->base.src) >> 16; + width = drm_rect_width(&plane_state->uapi.src) >> 16; + height = drm_rect_height(&plane_state->uapi.src) >> 16; /* UV plane does 1/2 pixel sub-sampling */ - if (plane == 1 && is_planar_yuv_format(format)) { + if (color_plane == 1) { width /= 2; height /= 2; } data_rate = width * height; - down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); + down_scale_amount = skl_plane_downscale_amount(crtc_state, plane_state); rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount); - rate *= fb->format->cpp[plane]; + rate *= fb->format->cpp[color_plane]; return rate; } static u64 -skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, +skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *plane_data_rate, u64 *uv_plane_data_rate) { - struct drm_crtc_state *cstate = &intel_cstate->base; - struct drm_atomic_state *state = cstate->state; - struct drm_plane *plane; - const struct drm_plane_state *pstate; + struct drm_atomic_state *state = crtc_state->uapi.state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) { - enum plane_id plane_id = to_intel_plane(plane)->id; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; - const struct intel_plane_state *intel_pstate = - to_intel_plane_state(pstate); /* packed/y */ - rate = skl_plane_relative_data_rate(intel_cstate, - intel_pstate, 0); + rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; /* uv-plane */ - rate = skl_plane_relative_data_rate(intel_cstate, - intel_pstate, 1); + rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1); uv_plane_data_rate[plane_id] = rate; total_data_rate += rate; } @@ -4282,28 +4223,23 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, } static u64 -icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, +icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *plane_data_rate) { - struct drm_crtc_state *cstate = &intel_cstate->base; - struct drm_atomic_state *state = cstate->state; - struct drm_plane *plane; - const struct drm_plane_state *pstate; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; - if (WARN_ON(!state)) + if (WARN_ON(!crtc_state->uapi.state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) { - const struct intel_plane_state *intel_pstate = - to_intel_plane_state(pstate); - enum plane_id plane_id = to_intel_plane(plane)->id; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; - if (!intel_pstate->linked_plane) { - rate = skl_plane_relative_data_rate(intel_cstate, - intel_pstate, 0); + if (!plane_state->planar_linked_plane) { + rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; } else { @@ -4311,23 +4247,21 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, /* * The slave plane might not iterate in - * drm_atomic_crtc_state_for_each_plane_state(), + * intel_atomic_crtc_state_for_each_plane_state(), * and needs the master plane state which may be * NULL if we try get_new_plane_state(), so we * always calculate from the master. */ - if (intel_pstate->slave) + if (plane_state->planar_slave) continue; /* Y plane rate is calculated on the slave */ - rate = skl_plane_relative_data_rate(intel_cstate, - intel_pstate, 0); - y_plane_id = intel_pstate->linked_plane->id; + rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); + y_plane_id = plane_state->planar_linked_plane->id; plane_data_rate[y_plane_id] = rate; total_data_rate += rate; - rate = skl_plane_relative_data_rate(intel_cstate, - intel_pstate, 1); + rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1); plane_data_rate[plane_id] = rate; total_data_rate += rate; } @@ -4337,14 +4271,14 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, } static int -skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, +skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state, struct skl_ddb_allocation *ddb /* out */) { - struct drm_atomic_state *state = cstate->base.state; - struct drm_crtc *crtc = cstate->base.crtc; + struct drm_atomic_state *state = crtc_state->uapi.state; + struct drm_crtc *crtc = crtc_state->uapi.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; + struct skl_ddb_entry *alloc = &crtc_state->wm.skl.ddb; u16 alloc_size, start = 0; u16 total[I915_MAX_PLANES] = {}; u16 uv_total[I915_MAX_PLANES] = {}; @@ -4357,40 +4291,40 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, int level; /* Clear the partitioning for disabled planes. */ - memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y)); - memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv)); + memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state->wm.skl.plane_ddb_y)); + memset(crtc_state->wm.skl.plane_ddb_uv, 0, sizeof(crtc_state->wm.skl.plane_ddb_uv)); if (WARN_ON(!state)) return 0; - if (!cstate->base.active) { + if (!crtc_state->hw.active) { alloc->start = alloc->end = 0; return 0; } if (INTEL_GEN(dev_priv) >= 11) total_data_rate = - icl_get_total_relative_data_rate(cstate, + icl_get_total_relative_data_rate(crtc_state, plane_data_rate); else total_data_rate = - skl_get_total_relative_data_rate(cstate, + skl_get_total_relative_data_rate(crtc_state, plane_data_rate, uv_plane_data_rate); - skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate, + skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate, ddb, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; /* Allocate fixed number of blocks for cursor. */ - total[PLANE_CURSOR] = skl_cursor_allocation(cstate, num_active); + total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active); alloc_size -= total[PLANE_CURSOR]; - cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start = + crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].start = alloc->end - total[PLANE_CURSOR]; - cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end; + crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end; if (total_data_rate == 0) return 0; @@ -4403,11 +4337,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, blocks = 0; for_each_plane_id_on_crtc(intel_crtc, plane_id) { const struct skl_plane_wm *wm = - &cstate->wm.skl.optimal.planes[plane_id]; + &crtc_state->wm.skl.optimal.planes[plane_id]; if (plane_id == PLANE_CURSOR) { - if (WARN_ON(wm->wm[level].min_ddb_alloc > - total[PLANE_CURSOR])) { + if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) { + WARN_ON(wm->wm[level].min_ddb_alloc != U16_MAX); blocks = U32_MAX; break; } @@ -4425,9 +4359,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, } if (level < 0) { - DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); - DRM_DEBUG_KMS("minimum required %d/%d\n", blocks, - alloc_size); + drm_dbg_kms(&dev_priv->drm, + "Requested display configuration exceeds system DDB limitations"); + drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n", + blocks, alloc_size); return -EINVAL; } @@ -4438,7 +4373,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { const struct skl_plane_wm *wm = - &cstate->wm.skl.optimal.planes[plane_id]; + &crtc_state->wm.skl.optimal.planes[plane_id]; u64 rate; u16 extra; @@ -4477,9 +4412,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { struct skl_ddb_entry *plane_alloc = - &cstate->wm.skl.plane_ddb_y[plane_id]; + &crtc_state->wm.skl.plane_ddb_y[plane_id]; struct skl_ddb_entry *uv_plane_alloc = - &cstate->wm.skl.plane_ddb_uv[plane_id]; + &crtc_state->wm.skl.plane_ddb_uv[plane_id]; if (plane_id == PLANE_CURSOR) continue; @@ -4510,7 +4445,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, for (level++; level <= ilk_wm_max_level(dev_priv); level++) { for_each_plane_id_on_crtc(intel_crtc, plane_id) { struct skl_plane_wm *wm = - &cstate->wm.skl.optimal.planes[plane_id]; + &crtc_state->wm.skl.optimal.planes[plane_id]; /* * We only disable the watermarks for each plane if @@ -4547,7 +4482,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { struct skl_plane_wm *wm = - &cstate->wm.skl.optimal.planes[plane_id]; + &crtc_state->wm.skl.optimal.planes[plane_id]; if (wm->trans_wm.plane_res_b >= total[plane_id]) memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); @@ -4599,43 +4534,43 @@ skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, } static uint_fixed_16_16_t -intel_get_linetime_us(const struct intel_crtc_state *cstate) +intel_get_linetime_us(const struct intel_crtc_state *crtc_state) { u32 pixel_rate; u32 crtc_htotal; uint_fixed_16_16_t linetime_us; - if (!cstate->base.active) + if (!crtc_state->hw.active) return u32_to_fixed16(0); - pixel_rate = cstate->pixel_rate; + pixel_rate = crtc_state->pixel_rate; if (WARN_ON(pixel_rate == 0)) return u32_to_fixed16(0); - crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; + crtc_htotal = crtc_state->hw.adjusted_mode.crtc_htotal; linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate); return linetime_us; } static u32 -skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate) +skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { u64 adjusted_pixel_rate; uint_fixed_16_16_t downscale_amount; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) + if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) return 0; /* * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = cstate->pixel_rate; - downscale_amount = skl_plane_downscale_amount(cstate, pstate); + adjusted_pixel_rate = crtc_state->pixel_rate; + downscale_amount = skl_plane_downscale_amount(crtc_state, plane_state); return mul_round_up_u32_fixed16(adjusted_pixel_rate, downscale_amount); @@ -4648,13 +4583,15 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, u32 plane_pixel_rate, struct skl_wm_params *wp, int color_plane) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 interm_pbpl; /* only planar format has two planes */ - if (color_plane == 1 && !is_planar_yuv_format(format->format)) { - DRM_DEBUG_KMS("Non planar format have single plane\n"); + if (color_plane == 1 && + !intel_format_info_is_yuv_semiplanar(format, modifier)) { + drm_dbg_kms(&dev_priv->drm, + "Non planar format have single plane\n"); return -EINVAL; } @@ -4665,7 +4602,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS; - wp->is_planar = is_planar_yuv_format(format->format); + wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier); wp->width = width; if (color_plane == 1 && wp->is_planar) @@ -4737,24 +4674,19 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, struct skl_wm_params *wp, int color_plane) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_framebuffer *fb = plane_state->hw.fb; int width; - if (plane->id == PLANE_CURSOR) { - width = plane_state->base.crtc_w; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - width = drm_rect_width(&plane_state->base.src) >> 16; - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ + width = drm_rect_width(&plane_state->uapi.src) >> 16; return skl_compute_wm_params(crtc_state, width, fb->format, fb->modifier, - plane_state->base.rotation, + plane_state->hw.rotation, skl_adjusted_plane_pixel_rate(crtc_state, plane_state), wp, color_plane); } @@ -4768,13 +4700,13 @@ static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) return level > 0; } -static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, +static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, int level, const struct skl_wm_params *wp, const struct skl_wm_level *result_prev, struct skl_wm_level *result /* out */) { - struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; @@ -4800,14 +4732,14 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, wp->cpp, latency, wp->dbuf_block_size); method2 = skl_wm_method2(wp->plane_pixel_rate, - cstate->base.adjusted_mode.crtc_htotal, + crtc_state->hw.adjusted_mode.crtc_htotal, latency, wp->plane_blocks_per_line); if (wp->y_tiled) { selected_result = max_fixed16(method2, wp->y_tile_minimum); } else { - if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / + if ((wp->cpp * crtc_state->hw.adjusted_mode.crtc_htotal / wp->dbuf_block_size < 1) && (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) { selected_result = method2; @@ -4894,18 +4826,18 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, } static void -skl_compute_wm_levels(const struct intel_crtc_state *cstate, +skl_compute_wm_levels(const struct intel_crtc_state *crtc_state, const struct skl_wm_params *wm_params, struct skl_wm_level *levels) { - struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int level, max_level = ilk_wm_max_level(dev_priv); struct skl_wm_level *result_prev = &levels[0]; for (level = 0; level <= max_level; level++) { struct skl_wm_level *result = &levels[level]; - skl_compute_plane_wm(cstate, level, wm_params, + skl_compute_plane_wm(crtc_state, level, wm_params, result_prev, result); result_prev = result; @@ -4913,14 +4845,14 @@ skl_compute_wm_levels(const struct intel_crtc_state *cstate, } static u32 -skl_compute_linetime_wm(const struct intel_crtc_state *cstate) +skl_compute_linetime_wm(const struct intel_crtc_state *crtc_state) { - struct drm_atomic_state *state = cstate->base.state; + struct drm_atomic_state *state = crtc_state->uapi.state; struct drm_i915_private *dev_priv = to_i915(state->dev); uint_fixed_16_16_t linetime_us; u32 linetime_wm; - linetime_us = intel_get_linetime_us(cstate); + linetime_us = intel_get_linetime_us(crtc_state); linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); /* Display WA #1135: BXT:ALL GLK:ALL */ @@ -4930,11 +4862,11 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate) return linetime_wm; } -static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, +static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, const struct skl_wm_params *wp, struct skl_plane_wm *wm) { - struct drm_device *dev = cstate->base.crtc->dev; + struct drm_device *dev = crtc_state->uapi.crtc->dev; const struct drm_i915_private *dev_priv = to_i915(dev); u16 trans_min, trans_y_tile_min; const u16 trans_amount = 10; /* This is configurable amount */ @@ -5032,8 +4964,8 @@ static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state, static int skl_build_plane_wm(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - const struct drm_framebuffer *fb = plane_state->base.fb; + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id plane_id = plane->id; int ret; @@ -5058,16 +4990,16 @@ static int skl_build_plane_wm(struct intel_crtc_state *crtc_state, static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id; + enum plane_id plane_id = to_intel_plane(plane_state->uapi.plane)->id; int ret; /* Watermarks calculated in master */ - if (plane_state->slave) + if (plane_state->planar_slave) return 0; - if (plane_state->linked_plane) { - const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id y_plane_id = plane_state->linked_plane->id; + if (plane_state->planar_linked_plane) { + const struct drm_framebuffer *fb = plane_state->hw.fb; + enum plane_id y_plane_id = plane_state->planar_linked_plane->id; WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)); WARN_ON(!fb->format->is_yuv || @@ -5092,13 +5024,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, return 0; } -static int skl_build_pipe_wm(struct intel_crtc_state *cstate) +static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); - struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; - struct drm_crtc_state *crtc_state = &cstate->base; - struct drm_plane *plane; - const struct drm_plane_state *pstate; + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; int ret; /* @@ -5107,19 +5038,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate) */ memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { - const struct intel_plane_state *intel_pstate = - to_intel_plane_state(pstate); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, + crtc_state) { if (INTEL_GEN(dev_priv) >= 11) - ret = icl_build_plane_wm(cstate, intel_pstate); + ret = icl_build_plane_wm(crtc_state, plane_state); else - ret = skl_build_plane_wm(cstate, intel_pstate); + ret = skl_build_plane_wm(crtc_state, plane_state); if (ret) return ret; } - pipe_wm->linetime = skl_compute_linetime_wm(cstate); + pipe_wm->linetime = skl_compute_linetime_wm(crtc_state); return 0; } @@ -5269,25 +5199,12 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb, return false; } -static u32 -pipes_modified(struct intel_atomic_state *state) -{ - struct intel_crtc *crtc; - struct intel_crtc_state *cstate; - u32 i, ret = 0; - - for_each_new_intel_crtc_in_state(state, crtc, cstate, i) - ret |= drm_crtc_mask(&crtc->base); - - return ret; -} - static int skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) { - struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state); - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->uapi.state); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_plane *plane; @@ -5353,7 +5270,7 @@ skl_print_wm_changes(struct intel_atomic_state *state) struct intel_crtc *crtc; int i; - if ((drm_debug & DRM_UT_KMS) == 0) + if (!drm_debug_enabled(DRM_UT_KMS)) return; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, @@ -5373,10 +5290,11 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_ddb_entry_equal(old, new)) continue; - DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n", - plane->base.base.id, plane->base.name, - old->start, old->end, new->start, new->end, - skl_ddb_entry_size(old), skl_ddb_entry_size(new)); + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n", + plane->base.base.id, plane->base.name, + old->start, old->end, new->start, new->end, + skl_ddb_entry_size(old), skl_ddb_entry_size(new)); } for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { @@ -5389,104 +5307,99 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_plane_wm_equals(dev_priv, old_wm, new_wm)) continue; - DRM_DEBUG_KMS("[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), - enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), - enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), - enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), - enast(old_wm->trans_wm.plane_en), - enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), - enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), - enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), - enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en)); - - DRM_DEBUG_KMS("[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), + enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), + enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), + enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), + enast(old_wm->trans_wm.plane_en), + enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), + enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), + enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), + enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), + enast(new_wm->trans_wm.plane_en)); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, - enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, - enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l, - enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l, - enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l, - enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l, - enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, - enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, - enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, - - enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, - enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, - enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l, - enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l, - enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l, - enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, - enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, - enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); - - DRM_DEBUG_KMS("[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, - old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, - old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, - old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, - old_wm->trans_wm.plane_res_b, - new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, - new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, - new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, - new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b); - - DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, - old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, - old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, - old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, - old_wm->trans_wm.min_ddb_alloc, - new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, - new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, - new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, - new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc); + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, + enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, + enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l, + enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l, + enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l, + enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l, + enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, + enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, + enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, + + enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, + enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, + enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l, + enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l, + enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l, + enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, + enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, + enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, + old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, + old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, + old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, + old_wm->trans_wm.plane_res_b, + new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, + new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, + new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, + new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, + new_wm->trans_wm.plane_res_b); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, + old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, + old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, + old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, + old_wm->trans_wm.min_ddb_alloc, + new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, + new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, + new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, + new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, + new_wm->trans_wm.min_ddb_alloc); } } } -static int -skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) +static int intel_add_all_pipes(struct intel_atomic_state *state) { - struct drm_device *dev = state->base.dev; - const struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - u32 realloc_pipes = pipes_modified(state); - int ret, i; - /* - * When we distrust bios wm we always need to recompute to set the - * expected DDB allocations for each CRTC. - */ - if (dev_priv->wm.distrust_bios_wm) - (*changed) = true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; - /* - * If this transaction isn't actually touching any CRTC's, don't - * bother with watermark calculation. Note that if we pass this - * test, we're guaranteed to hold at least one CRTC state mutex, - * which means we can safely use values like dev_priv->active_crtcs - * since any racing commits that want to update them would need to - * hold _all_ CRTC state mutexes. - */ - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) - (*changed) = true; + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } - if (!*changed) - return 0; + return 0; +} + +static int +skl_ddb_add_affected_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + int ret; /* * If this is our first atomic update following hardware readout, @@ -5495,21 +5408,21 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) * ensure a full DDB recompute. */ if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, state->base.acquire_ctx); if (ret) return ret; - state->active_pipe_changes = ~0; + state->active_pipe_changes = INTEL_INFO(dev_priv)->pipe_mask; /* - * We usually only initialize state->active_crtcs if we + * We usually only initialize state->active_pipes if we * we're doing a modeset; make sure this field is always * initialized during the sanitization process that happens * on the first commit too. */ if (!state->modeset) - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; } /* @@ -5526,18 +5439,11 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) * to grab the lock on *all* CRTC's. */ if (state->active_pipe_changes || state->modeset) { - realloc_pipes = ~0; - state->wm_results.dirty_pipes = ~0; - } + state->wm_results.dirty_pipes = INTEL_INFO(dev_priv)->pipe_mask; - /* - * We're not recomputing for the pipes not included in the commit, so - * make sure we start with the current state. - */ - for_each_intel_crtc_mask(dev, crtc, realloc_pipes) { - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); + ret = intel_add_all_pipes(state); + if (ret) + return ret; } return 0; @@ -5587,7 +5493,7 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, * power well the hardware state will go out of sync * with the software state. */ - if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) && + if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) && skl_plane_wm_equals(dev_priv, &old_crtc_state->wm.skl.optimal.planes[plane_id], &new_crtc_state->wm.skl.optimal.planes[plane_id])) @@ -5610,14 +5516,13 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc_state *new_crtc_state; struct intel_crtc_state *old_crtc_state; struct skl_ddb_values *results = &state->wm_results; - bool changed = false; int ret, i; /* Clear all dirty flags */ results->dirty_pipes = 0; - ret = skl_ddb_add_affected_pipes(state, &changed); - if (ret || !changed) + ret = skl_ddb_add_affected_pipes(state); + if (ret) return ret; /* @@ -5639,7 +5544,7 @@ skl_compute_wm(struct intel_atomic_state *state) if (!skl_pipe_wm_equals(crtc, &old_crtc_state->wm.skl.optimal, &new_crtc_state->wm.skl.optimal)) - results->dirty_pipes |= drm_crtc_mask(&crtc->base); + results->dirty_pipes |= BIT(crtc->pipe); } ret = skl_compute_ddb(state); @@ -5652,34 +5557,35 @@ skl_compute_wm(struct intel_atomic_state *state) } static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, - struct intel_crtc_state *cstate) + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; enum pipe pipe = crtc->pipe; - if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) + if ((state->wm_results.dirty_pipes & BIT(crtc->pipe)) == 0) return; I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); } static void skl_initial_wm(struct intel_atomic_state *state, - struct intel_crtc_state *cstate) + struct intel_crtc *crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct skl_ddb_values *results = &state->wm_results; - if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) + if ((results->dirty_pipes & BIT(crtc->pipe)) == 0) return; mutex_lock(&dev_priv->wm.wm_mutex); - if (cstate->base.active_changed) - skl_atomic_update_crtc_wm(state, cstate); + if (crtc_state->uapi.active_changed) + skl_atomic_update_crtc_wm(state, crtc); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -5735,28 +5641,31 @@ static void ilk_program_watermarks(struct drm_i915_private *dev_priv) } static void ilk_initial_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *cstate) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); mutex_lock(&dev_priv->wm.wm_mutex); - intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate; + crtc->wm.active.ilk = crtc_state->wm.ilk.intermediate; ilk_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } static void ilk_optimize_watermarks(struct intel_atomic_state *state, - struct intel_crtc_state *cstate) + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; mutex_lock(&dev_priv->wm.wm_mutex); - if (cstate->wm.need_postvbl_update) { - intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal; - ilk_program_watermarks(dev_priv); - } + crtc->wm.active.ilk = crtc_state->wm.ilk.optimal; + ilk_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -5812,19 +5721,19 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) struct skl_ddb_values *hw = &dev_priv->wm.skl_hw; struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; struct intel_crtc *crtc; - struct intel_crtc_state *cstate; + struct intel_crtc_state *crtc_state; skl_ddb_get_hw_state(dev_priv, ddb); for_each_intel_crtc(&dev_priv->drm, crtc) { - cstate = to_intel_crtc_state(crtc->base.state); + crtc_state = to_intel_crtc_state(crtc->base.state); - skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal); + skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); if (crtc->active) - hw->dirty_pipes |= drm_crtc_mask(&crtc->base); + hw->dirty_pipes |= BIT(crtc->pipe); } - if (dev_priv->active_crtcs) { + if (dev_priv->active_pipes) { /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } @@ -5835,8 +5744,8 @@ static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc) struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state); - struct intel_pipe_wm *active = &cstate->wm.ilk.optimal; + struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + struct intel_pipe_wm *active = &crtc_state->wm.ilk.optimal; enum pipe pipe = crtc->pipe; static const i915_reg_t wm0_pipe_reg[] = { [PIPE_A] = WM0_PIPEA_ILK, @@ -6057,19 +5966,22 @@ void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv) crtc_state->wm.g4x.optimal = *active; crtc_state->wm.g4x.intermediate = *active; - DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", - pipe_name(pipe), - wm->pipe[pipe].plane[PLANE_PRIMARY], - wm->pipe[pipe].plane[PLANE_CURSOR], - wm->pipe[pipe].plane[PLANE_SPRITE0]); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", + pipe_name(pipe), + wm->pipe[pipe].plane[PLANE_PRIMARY], + wm->pipe[pipe].plane[PLANE_CURSOR], + wm->pipe[pipe].plane[PLANE_SPRITE0]); } - DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", - wm->sr.plane, wm->sr.cursor, wm->sr.fbc); - DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", - wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); - DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n", - yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); + drm_dbg_kms(&dev_priv->drm, + "Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", + wm->sr.plane, wm->sr.cursor, wm->sr.fbc); + drm_dbg_kms(&dev_priv->drm, + "Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", + wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); + drm_dbg_kms(&dev_priv->drm, "Initial SR=%s HPLL=%s FBC=%s\n", + yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); } void g4x_wm_sanitize(struct drm_i915_private *dev_priv) @@ -6090,7 +6002,7 @@ void g4x_wm_sanitize(struct drm_i915_private *dev_priv) enum plane_id plane_id = plane->id; int level; - if (plane_state->base.visible) + if (plane_state->uapi.visible) continue; for (level = 0; level < 3; level++) { @@ -6161,8 +6073,9 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { - DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " - "assuming DDR DVFS is disabled\n"); + drm_dbg_kms(&dev_priv->drm, + "Punit not acking DDR DVFS request, " + "assuming DDR DVFS is disabled\n"); dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; } else { val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); @@ -6213,16 +6126,18 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) crtc_state->wm.vlv.optimal = *active; crtc_state->wm.vlv.intermediate = *active; - DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", - pipe_name(pipe), - wm->pipe[pipe].plane[PLANE_PRIMARY], - wm->pipe[pipe].plane[PLANE_CURSOR], - wm->pipe[pipe].plane[PLANE_SPRITE0], - wm->pipe[pipe].plane[PLANE_SPRITE1]); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", + pipe_name(pipe), + wm->pipe[pipe].plane[PLANE_PRIMARY], + wm->pipe[pipe].plane[PLANE_CURSOR], + wm->pipe[pipe].plane[PLANE_SPRITE0], + wm->pipe[pipe].plane[PLANE_SPRITE1]); } - DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", - wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", + wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } void vlv_wm_sanitize(struct drm_i915_private *dev_priv) @@ -6245,7 +6160,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) enum plane_id plane_id = plane->id; int level; - if (plane_state->base.visible) + if (plane_state->uapi.visible) continue; for (level = 0; level < wm_state->num_levels; level++) { @@ -6402,2377 +6317,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) intel_enable_ipc(dev_priv); } -/* - * Lock protecting IPS related data structures - */ -DEFINE_SPINLOCK(mchdev_lock); - -bool ironlake_set_drps(struct drm_i915_private *i915, u8 val) -{ - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; - - lockdep_assert_held(&mchdev_lock); - - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ - } - - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - intel_uncore_posting_read16(uncore, MEMSWCTL); - - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - - return true; -} - -static void ironlake_enable_drps(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 rgvmodectl; - u8 fmax, fmin, fstart, vstart; - - spin_lock_irq(&mchdev_lock); - - rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); - - /* Enable temp reporting */ - intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN); - intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE); - - /* 100ms RC evaluation intervals */ - intel_uncore_write(uncore, RCUPEI, 100000); - intel_uncore_write(uncore, RCDNEI, 100000); - - /* Set max/min thresholds to 90ms and 80ms respectively */ - intel_uncore_write(uncore, RCBMAXAVG, 90000); - intel_uncore_write(uncore, RCBMINAVG, 80000); - - intel_uncore_write(uncore, MEMIHYST, 1); - - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; - - vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & - PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; - - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; - - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; - - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); - - intel_uncore_write(uncore, - MEMINTREN, - MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); - - /* - * Interrupts will be enabled in ironlake_irq_postinstall - */ - - intel_uncore_write(uncore, VIDSTART, vstart); - intel_uncore_posting_read(uncore, VIDSTART); - - rgvmodectl |= MEMMODE_SWMODE_EN; - intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); - - if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & - MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); - mdelay(1); - - ironlake_set_drps(dev_priv, fstart); - - dev_priv->ips.last_count1 = - intel_uncore_read(uncore, DMIEC) + - intel_uncore_read(uncore, DDREC) + - intel_uncore_read(uncore, CSIEC); - dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); - dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC); - dev_priv->ips.last_time2 = ktime_get_raw_ns(); - - spin_unlock_irq(&mchdev_lock); -} - -static void ironlake_disable_drps(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; - - spin_lock_irq(&mchdev_lock); - - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - - /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, - MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); - intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - intel_uncore_write(uncore, - DEIER, - intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); - intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); - intel_uncore_write(uncore, - DEIMR, - intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); - - /* Go back to the starting frequency */ - ironlake_set_drps(i915, i915->ips.fstart); - mdelay(1); - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write(uncore, MEMSWCTL, rgvswctl); - mdelay(1); - - spin_unlock_irq(&mchdev_lock); -} - -/* There's a funny hw issue where the hw returns all 0 when reading from - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value - * ourselves, instead of doing a rmw cycle (which might result in us clearing - * all limits and the gpu stuck at whatever frequency it is at atm). - */ -static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 limits; - - /* Only set the down limit when we've reached the lowest level to avoid - * getting more interrupts, otherwise leave this clear. This prevents a - * race in the hw when coming out of rc6: There's a tiny window where - * the hw runs at the minimal clock before selecting the desired - * frequency, if the down threshold expires in that window we will not - * receive a down interrupt. */ - if (INTEL_GEN(dev_priv) >= 9) { - limits = (rps->max_freq_softlimit) << 23; - if (val <= rps->min_freq_softlimit) - limits |= (rps->min_freq_softlimit) << 14; - } else { - limits = rps->max_freq_softlimit << 24; - if (val <= rps->min_freq_softlimit) - limits |= rps->min_freq_softlimit << 16; - } - - return limits; -} - -static void rps_set_power(struct drm_i915_private *dev_priv, int new_power) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 threshold_up = 0, threshold_down = 0; /* in % */ - u32 ei_up = 0, ei_down = 0; - - lockdep_assert_held(&rps->power.mutex); - - if (new_power == rps->power.mode) - return; - - /* Note the units here are not exactly 1us, but 1280ns. */ - switch (new_power) { - case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ - ei_up = 16000; - threshold_up = 95; - - /* Downclock if less than 85% busy over 32ms */ - ei_down = 32000; - threshold_down = 85; - break; - - case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ - ei_up = 13000; - threshold_up = 90; - - /* Downclock if less than 75% busy over 32ms */ - ei_down = 32000; - threshold_down = 75; - break; - - case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ - ei_up = 10000; - threshold_up = 85; - - /* Downclock if less than 60% busy over 32ms */ - ei_down = 32000; - threshold_down = 60; - break; - } - - /* When byt can survive without system hang with dynamic - * sw freq adjustments, this restriction can be lifted. - */ - if (IS_VALLEYVIEW(dev_priv)) - goto skip_hw_write; - - I915_WRITE(GEN6_RP_UP_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_up)); - I915_WRITE(GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_up * threshold_up / 100)); - - I915_WRITE(GEN6_RP_DOWN_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_down)); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_down * threshold_down / 100)); - - I915_WRITE(GEN6_RP_CONTROL, - (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - -skip_hw_write: - rps->power.mode = new_power; - rps->power.up_threshold = threshold_up; - rps->power.down_threshold = threshold_down; -} - -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int new_power; - - new_power = rps->power.mode; - switch (rps->power.mode) { - case LOW_POWER: - if (val > rps->efficient_freq + 1 && - val > rps->cur_freq) - new_power = BETWEEN; - break; - - case BETWEEN: - if (val <= rps->efficient_freq && - val < rps->cur_freq) - new_power = LOW_POWER; - else if (val >= rps->rp0_freq && - val > rps->cur_freq) - new_power = HIGH_POWER; - break; - - case HIGH_POWER: - if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && - val < rps->cur_freq) - new_power = BETWEEN; - break; - } - /* Max/min bins are special */ - if (val <= rps->min_freq_softlimit) - new_power = LOW_POWER; - if (val >= rps->max_freq_softlimit) - new_power = HIGH_POWER; - - mutex_lock(&rps->power.mutex); - if (rps->power.interactive) - new_power = HIGH_POWER; - rps_set_power(dev_priv, new_power); - mutex_unlock(&rps->power.mutex); -} - -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive) -{ - struct intel_rps *rps = &i915->gt_pm.rps; - - if (INTEL_GEN(i915) < 6) - return; - - mutex_lock(&rps->power.mutex); - if (interactive) { - if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake)) - rps_set_power(i915, HIGH_POWER); - } else { - GEM_BUG_ON(!rps->power.interactive); - rps->power.interactive--; - } - mutex_unlock(&rps->power.mutex); -} - -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 mask = 0; - - /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > rps->min_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < rps->max_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; - - mask &= dev_priv->pm_rps_events; - - return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); -} - -/* gen6_set_rps is called to update the frequency request, but should also be - * called when the range (min_delay and max_delay) is modified so that we can - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* min/max delay may still have been modified so be sure to - * write the limits value. - */ - if (val != rps->cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - - if (INTEL_GEN(dev_priv) >= 9) - I915_WRITE(GEN6_RPNSWREQ, - GEN9_FREQUENCY(val)); - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(val)); - else - I915_WRITE(GEN6_RPNSWREQ, - GEN6_FREQUENCY(val) | - GEN6_OFFSET(0) | - GEN6_AGGRESSIVE_TURBO); - } - - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - rps->cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - int err; - - if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), - "Odd GPU freq value\n")) - val &= ~1; - - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - if (val != dev_priv->gt_pm.rps.cur_freq) { - vlv_punit_get(dev_priv); - err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - vlv_punit_put(dev_priv); - if (err) - return err; - - gen6_set_rps_thresholds(dev_priv, val); - } - - dev_priv->gt_pm.rps.cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down - * - * * If Gfx is Idle, then - * 1. Forcewake Media well. - * 2. Request idle freq. - * 3. Release Forcewake of Media well. -*/ -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val = rps->idle_freq; - int err; - - if (rps->cur_freq <= val) - return; - - /* The punit delays the write of the frequency and voltage until it - * determines the GPU is awake. During normal usage we don't want to - * waste power changing the frequency if the GPU is sleeping (rc6). - * However, the GPU and driver is now idle and we do not want to delay - * switching to minimum voltage (reducing power whilst idle) as we do - * not expect to be woken in the near future and so must flush the - * change by waking the device. - * - * We choose to take the media powerwell (either would do to trick the - * punit into committing the voltage change) as that takes a lot less - * power than the render powerwell. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA); - err = valleyview_set_rps(dev_priv, val); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA); - - if (err) - DRM_ERROR("Failed to set RPS for idle\n"); -} - -void gen6_rps_busy(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - mutex_lock(&rps->lock); - if (rps->enabled) { - u8 freq; - - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) - gen6_rps_reset_ei(dev_priv); - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, rps->cur_freq)); - - gen6_enable_rps_interrupts(dev_priv); - - /* Use the user's desired frequency as a guide, but for better - * performance, jump directly to RPe as our starting frequency. - */ - freq = max(rps->cur_freq, - rps->efficient_freq); - - if (intel_set_rps(dev_priv, - clamp(freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit))) - DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* Flush our bottom-half so that it does not race with us - * setting the idle frequency and so that it is bounded by - * our rpm wakeref. And then disable the interrupts to stop any - * futher RPS reclocking whilst we are asleep. - */ - gen6_disable_rps_interrupts(dev_priv); - - mutex_lock(&rps->lock); - if (rps->enabled) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_set_rps_idle(dev_priv); - else - gen6_set_rps(dev_priv, rps->idle_freq); - rps->last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, - gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_boost(struct i915_request *rq) -{ - struct intel_rps *rps = &rq->i915->gt_pm.rps; - unsigned long flags; - bool boost; - - /* This is intentionally racy! We peek at the state here, then - * validate inside the RPS worker. - */ - if (!rps->enabled) - return; - - if (i915_request_signaled(rq)) - return; - - /* Serializes with i915_request_retire() */ - boost = false; - spin_lock_irqsave(&rq->lock, flags); - if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { - boost = !atomic_fetch_inc(&rps->num_waiters); - rq->waitboost = true; - } - spin_unlock_irqrestore(&rq->lock, flags); - if (!boost) - return; - - if (READ_ONCE(rps->cur_freq) < rps->boost_freq) - schedule_work(&rps->work); - - atomic_inc(&rps->boosts); -} - -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int err; - - lockdep_assert_held(&rps->lock); - GEM_BUG_ON(val > rps->max_freq); - GEM_BUG_ON(val < rps->min_freq); - - if (!rps->enabled) { - rps->cur_freq = val; - return 0; - } - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - err = valleyview_set_rps(dev_priv, val); - else - err = gen6_set_rps(dev_priv, val); - - return err; -} - -static void gen9_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN9_PG_ENABLE, 0); -} - -static void gen9_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void gen6_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - -static void gen6_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RPNSWREQ, 1 << 31); - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) -{ - /* We're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RC_CONTROL, 0); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) -{ - bool enable_rc6 = true; - unsigned long rc6_ctx_base; - u32 rc_ctl; - int rc_sw_target; - - rc_ctl = I915_READ(GEN6_RC_CONTROL); - rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> - RC_SW_TARGET_STATE_SHIFT; - DRM_DEBUG_DRIVER("BIOS enabled RC states: " - "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", - onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), - onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), - rc_sw_target); - - if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); - enable_rc6 = false; - } - - /* - * The exact context size is not known for BXT, so assume a page size - * for this check. - */ - rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && - (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { - DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); - enable_rc6 = false; - } - - if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { - DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_PUSHBUS_CONTROL) || - !I915_READ(GEN8_PUSHBUS_ENABLE) || - !I915_READ(GEN8_PUSHBUS_SHIFT)) { - DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN6_GFXPAUSE)) { - DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_MISC_CTRL0)) { - DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); - enable_rc6 = false; - } - - return enable_rc6; -} - -static bool sanitize_rc6(struct drm_i915_private *i915) -{ - struct intel_device_info *info = mkwrite_device_info(i915); - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) { - info->has_rc6 = 0; - info->has_rps = false; - } - - if (info->has_rc6 && - IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { - DRM_INFO("RC6 disabled by BIOS\n"); - info->has_rc6 = 0; - } - - /* - * We assume that we do not have any deep rc6 levels if we don't have - * have the previous rc6 level supported, i.e. we use HAS_RC6() - * as the initial coarse check for rc6 in general, moving on to - * progressively finer/deeper levels. - */ - if (!info->has_rc6 && info->has_rc6p) - info->has_rc6p = 0; - - return info->has_rc6; -} - -static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* All of these values are in units of 50MHz */ - - /* static values from HW: RP0 > RP1 > RPn (min_freq) */ - if (IS_GEN9_LP(dev_priv)) { - u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 0) & 0xff; - } else { - u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 16) & 0xff; - } - /* hw_max = RP0 until we check for overclocking */ - rps->max_freq = rps->rp0_freq; - - rps->efficient_freq = rps->rp1_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - u32 ddcc_status = 0; - - if (sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status, NULL) == 0) - rps->efficient_freq = - clamp_t(u8, - ((ddcc_status >> 8) & 0xff), - rps->min_freq, - rps->max_freq); - } - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Store the frequency values in 16.66 MHZ units, which is - * the natural hardware unit for SKL - */ - rps->rp0_freq *= GEN9_FREQ_SCALER; - rps->rp1_freq *= GEN9_FREQ_SCALER; - rps->min_freq *= GEN9_FREQ_SCALER; - rps->max_freq *= GEN9_FREQ_SCALER; - rps->efficient_freq *= GEN9_FREQ_SCALER; - } -} - -static void reset_rps(struct drm_i915_private *dev_priv, - int (*set)(struct drm_i915_private *, u8)) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u8 freq = rps->cur_freq; - - /* force a reset */ - rps->power.mode = -1; - rps->cur_freq = -1; - - if (set(dev_priv, freq)) - DRM_ERROR("Failed to reset RPS to initial values\n"); -} - -/* See the Gen9_GT_PM_Programming_Guide doc for the below */ -static void gen9_enable_rps(struct drm_i915_private *dev_priv) -{ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Program defaults and thresholds for RPS */ - if (IS_GEN(dev_priv, 9)) - I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); - - /* 1 second timeout*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(dev_priv, 1000000)); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); - - /* Leaning on the below call to gen6_set_rps to program/setup the - * Up/Down EI & threshold registers, as well as the RP_CONTROL, - * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen11_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* - * 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GUC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from intel_pstate is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); - - /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */ - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen9_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6_mode; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - if (INTEL_GEN(dev_priv) >= 10) { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - } else if (IS_SKYLAKE(dev_priv)) { - /* - * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only - * when CPG is enabled - */ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - } else { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); - } - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GUC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from intel_pstate is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); - - /* - * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. - */ - if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - I915_WRITE(GEN9_PG_ENABLE, 0); - else - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - - /* 3: Enable RC6 */ - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(rps->rp1_freq)); - I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ - - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - rps->max_freq_softlimit << 24 | - rps->min_freq_softlimit << 16); - - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen6_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6vids, rc6_mask; - u32 gtfifodbg; - int ret; - - I915_WRITE(GEN6_RC_STATE, 0); - - /* Clear the DBG now so we don't confuse earlier errors */ - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - - /* We don't use those on Haswell */ - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - if (HAS_RC6p(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - if (HAS_RC6pp(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); - - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, - &rc6vids, NULL); - if (IS_GEN(dev_priv, 6) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen6_enable_rps(struct drm_i915_private *dev_priv) -{ - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const int min_freq = 15; - const int scaling_factor = 180; - unsigned int gpu_freq; - unsigned int max_ia_freq, min_ring_freq; - unsigned int max_gpu_freq, min_gpu_freq; - struct cpufreq_policy *policy; - - lockdep_assert_held(&rps->lock); - - if (rps->max_freq <= rps->min_freq) - return; - - policy = cpufreq_cpu_get(0); - if (policy) { - max_ia_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } else { - /* - * Default to measured freq if none found, PCU will ensure we - * don't go over - */ - max_ia_freq = tsc_khz; - } - - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; - - min_ring_freq = I915_READ(DCLK) & 0xf; - /* convert DDR frequency from units of 266.6MHz to bandwidth */ - min_ring_freq = mult_frac(min_ring_freq, 8, 3); - - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Convert GT frequency to 50 HZ units */ - min_gpu_freq /= GEN9_FREQ_SCALER; - max_gpu_freq /= GEN9_FREQ_SCALER; - } - - /* - * For each potential GPU frequency, load a ring frequency we'd like - * to use for memory access. We do this by specifying the IA frequency - * the PCU should use as a reference to determine the ring frequency. - */ - for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { - const int diff = max_gpu_freq - gpu_freq; - unsigned int ia_freq = 0, ring_freq = 0; - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* - * ring_freq = 2 * GT. ring_freq is in 100MHz units - * No floor required for ring frequency on SKL. - */ - ring_freq = gpu_freq; - } else if (INTEL_GEN(dev_priv) >= 8) { - /* max(2 * GT, DDR). NB: GT is 50MHz units */ - ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev_priv)) { - ring_freq = mult_frac(gpu_freq, 5, 4); - ring_freq = max(min_ring_freq, ring_freq); - /* leave ia_freq as the default, chosen by cpufreq */ - } else { - /* On older processors, there is no separate ring - * clock domain, so in order to boost the bandwidth - * of the ring, we need to upclock the CPU (ia_freq). - * - * For GPU frequencies less than 750MHz, - * just use the lowest ring freq. - */ - if (gpu_freq < min_freq) - ia_freq = 800; - else - ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); - } - - sandybridge_pcode_write(dev_priv, - GEN6_PCODE_WRITE_MIN_FREQ_TABLE, - ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | - ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | - gpu_freq); - } -} - -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - - switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) { - case 8: - /* (2 * 4) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); - break; - case 12: - /* (2 * 6) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); - break; - case 16: - /* (2 * 8) config */ - default: - /* Setting (2 * 8) Min RP0 for any other combination */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); - break; - } - - rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); - - return rp0; -} - -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); - rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; - - return rpe; -} - -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - rp1 = (val & FB_GFX_FREQ_FUSE_MASK); - - return rp1; -} - -static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpn; - - val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); - rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & - FB_GFX_FREQ_FUSE_MASK); - - return rpn; -} - -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; - - return rp1; -} - -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; - /* Clamp to max */ - rp0 = min_t(u32, rp0, 0xea); - - return rp0; -} - -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); - rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); - rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; - - return rpe; -} - -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; - /* - * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value - * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on - * a BYT-M B0 the above register contains 0xbf. Moreover when setting - * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 - * to make sure it matches what Punit accepts. - */ - return max_t(u32, val, 0xc0); -} - -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON(pctx_addr != dev_priv->dsm.start + - dev_priv->vlv_pctx->stolen->start); -} - - -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} - -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) -{ - resource_size_t pctx_paddr, paddr; - resource_size_t pctx_size = 32*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = dev_priv->dsm.end + 1 - pctx_size; - GEM_BUG_ON(paddr > U32_MAX); - - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } - - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); -} - -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - resource_size_t pctx_paddr; - resource_size_t pctx_size = 24*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - resource_size_t pcbr_offset; - - pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; - } - - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - - /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. - */ - pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - goto out; - } - - GEM_BUG_ON(range_overflows_t(u64, - dev_priv->dsm.start, - pctx->stolen->start, - U32_MAX)); - pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); - -out: - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); - dev_priv->vlv_pctx = pctx; -} - -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - - pctx = fetch_and_zero(&dev_priv->vlv_pctx); - if (pctx) - i915_gem_object_put(pctx); -} - -static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.gpll_ref_freq = - vlv_get_cck_clock(dev_priv, "GPLL ref", - CCK_GPLL_CLOCK_CONTROL, - dev_priv->czclk_freq); - - DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->gt_pm.rps.gpll_ref_freq); -} - -static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - valleyview_setup_pctx(dev_priv); - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = valleyview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = valleyview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); -} - -static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - cherryview_setup_pctx(dev_priv); - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - - switch ((val >> 2) & 0x7) { - case 3: - dev_priv->mem_freq = 2000; - break; - default: - dev_priv->mem_freq = 1600; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = cherryview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = cherryview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | - rps->min_freq) & 1, - "Odd GPU freq values\n"); -} - -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - valleyview_cleanup_pctx(dev_priv); -} - -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg, rc6_mode, pcbr; - - gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | - GT_FIFO_FREE_ENTRIES_CHV); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - cherryview_check_pctx(dev_priv); - - /* 1a & 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ - I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); - - /* 3: Enable RC6 */ - rc6_mode = 0; - if (pcbr >> VLV_PCBR_ADDR_SHIFT) - rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1: Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - /* Setting Fixed Bias */ - vlv_punit_get(dev_priv); - - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg; - - valleyview_check_pctx(dev_priv); - - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - I915_WRITE(GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - - vlv_punit_get(dev_priv); - - /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static unsigned long intel_pxfreq(u32 vidfreq) -{ - unsigned long freq; - int div = (vidfreq & 0x3f0000) >> 16; - int post = (vidfreq & 0x3000) >> 12; - int pre = (vidfreq & 0x7); - - if (!pre) - return 0; - - freq = ((div * 133333) / ((1<<post) * pre)); - - return freq; -} - -static const struct cparams { - u16 i; - u16 t; - u16 m; - u16 c; -} cparams[] = { - { 1, 1333, 301, 28664 }, - { 1, 1066, 294, 24460 }, - { 1, 800, 294, 25192 }, - { 0, 1333, 276, 27605 }, - { 0, 1066, 276, 27605 }, - { 0, 800, 231, 23784 }, -}; - -static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) -{ - u64 total_count, diff, ret; - u32 count1, count2, count3, m = 0, c = 0; - unsigned long now = jiffies_to_msecs(jiffies), diff1; - int i; - - lockdep_assert_held(&mchdev_lock); - - diff1 = now - dev_priv->ips.last_time1; - - /* Prevent division-by-zero if we are asking too fast. - * Also, we don't get interesting results if we are polling - * faster than once in 10ms, so just return the saved value - * in such cases. - */ - if (diff1 <= 10) - return dev_priv->ips.chipset_power; - - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); - - total_count = count1 + count2 + count3; - - /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->ips.last_count1) { - diff = ~0UL - dev_priv->ips.last_count1; - diff += total_count; - } else { - diff = total_count - dev_priv->ips.last_count1; - } - - for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->ips.c_m && - cparams[i].t == dev_priv->ips.r_t) { - m = cparams[i].m; - c = cparams[i].c; - break; - } - } - - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); - - dev_priv->ips.last_count1 = total_count; - dev_priv->ips.last_time1 = now; - - dev_priv->ips.chipset_power = ret; - - return ret; -} - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_chipset_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -unsigned long i915_mch_val(struct drm_i915_private *i915) -{ - unsigned long m, x, b; - u32 tsfs; - - tsfs = intel_uncore_read(&i915->uncore, TSFS); - - m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); - x = intel_uncore_read8(&i915->uncore, TR1); - - b = tsfs & TSFS_INTR_MASK; - - return ((m * x) / 127) - b; -} - -static int _pxvid_to_vd(u8 pxvid) -{ - if (pxvid == 0) - return 0; - - if (pxvid >= 8 && pxvid < 31) - pxvid = 31; - - return (pxvid + 2) * 125; -} - -static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) -{ - const int vd = _pxvid_to_vd(pxvid); - const int vm = vd - 1125; - - if (INTEL_INFO(dev_priv)->is_mobile) - return vm > 0 ? vm : 0; - - return vd; -} - -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - u64 now, diff, diffms; - u32 count; - - lockdep_assert_held(&mchdev_lock); - - now = ktime_get_raw_ns(); - diffms = now - dev_priv->ips.last_time2; - do_div(diffms, NSEC_PER_MSEC); - - /* Don't divide by 0 */ - if (!diffms) - return; - - count = I915_READ(GFXEC); - - if (count < dev_priv->ips.last_count2) { - diff = ~0UL - dev_priv->ips.last_count2; - diff += count; - } else { - diff = count - dev_priv->ips.last_count2; - } - - dev_priv->ips.last_count2 = count; - dev_priv->ips.last_time2 = now; - - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - dev_priv->ips.gfx_power = diff; -} - -void i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - if (!IS_GEN(dev_priv, 5)) - return; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - __i915_update_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } -} - -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) -{ - unsigned long t, corr, state1, corr2, state2; - u32 pxvid, ext_v; - - lockdep_assert_held(&mchdev_lock); - - pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); - pxvid = (pxvid >> 24) & 0x7f; - ext_v = pvid_to_extvid(dev_priv, pxvid); - - state1 = ext_v; - - t = i915_mch_val(dev_priv); - - /* Revel in the empirically derived constants */ - - /* Correction factor in 1/100000 units */ - if (t > 80) - corr = ((t * 2349) + 135940); - else if (t >= 50) - corr = ((t * 964) + 29317); - else /* < 50 */ - corr = ((t * 301) + 1004); - - corr = corr * ((150142 * state1) / 10000 - 78642); - corr /= 100000; - corr2 = (corr * dev_priv->ips.corr); - - state2 = (corr2 * state1) / 10000; - state2 /= 100; /* convert to mW */ - - __i915_update_gfx_val(dev_priv); - - return dev_priv->ips.gfx_power + state2; -} - -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -static struct drm_i915_private __rcu *i915_mch_dev; - -static struct drm_i915_private *mchdev_get(void) -{ - struct drm_i915_private *i915; - - rcu_read_lock(); - i915 = rcu_dereference(i915_mch_dev); - if (!kref_get_unless_zero(&i915->drm.ref)) - i915 = NULL; - rcu_read_unlock(); - - return i915; -} - -/** - * i915_read_mch_val - return value for IPS use - * - * Calculate and return a value for the IPS driver to use when deciding whether - * we have thermal and power headroom to increase CPU or GPU power budget. - */ -unsigned long i915_read_mch_val(void) -{ - struct drm_i915_private *i915; - unsigned long chipset_val = 0; - unsigned long graphics_val = 0; - intel_wakeref_t wakeref; - - i915 = mchdev_get(); - if (!i915) - return 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - chipset_val = __i915_chipset_val(i915); - graphics_val = __i915_gfx_val(i915); - spin_unlock_irq(&mchdev_lock); - } - - drm_dev_put(&i915->drm); - return chipset_val + graphics_val; -} -EXPORT_SYMBOL_GPL(i915_read_mch_val); - -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay > i915->ips.fmax) - i915->ips.max_delay--; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_raise); - -/** - * i915_gpu_lower - lower GPU frequency limit - * - * IPS indicates we're close to a thermal limit, so throttle back the GPU - * frequency maximum. - */ -bool i915_gpu_lower(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay < i915->ips.min_delay) - i915->ips.max_delay++; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_lower); - -/** - * i915_gpu_busy - indicate GPU business to IPS - * - * Tell the IPS driver whether or not the GPU is busy. - */ -bool i915_gpu_busy(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - ret = i915->gt.awake; - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_busy); - -/** - * i915_gpu_turbo_disable - disable graphics turbo - * - * Disable graphics turbo by resetting the max frequency and setting the - * current frequency to the default. - */ -bool i915_gpu_turbo_disable(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - i915->ips.max_delay = i915->ips.fstart; - ret = ironlake_set_drps(i915, i915->ips.fstart); - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); - -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); - - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} - -void intel_gpu_ips_init(struct drm_i915_private *dev_priv) -{ - /* We only register the i915 ips part with intel-ips once everything is - * set up, to avoid intel-ips sneaking in and reading bogus values. */ - rcu_assign_pointer(i915_mch_dev, dev_priv); - - ips_ping_for_i915_load(); -} - -void intel_gpu_ips_teardown(void) -{ - rcu_assign_pointer(i915_mch_dev, NULL); -} - -static void intel_init_emon(struct drm_i915_private *dev_priv) -{ - u32 lcfuse; - u8 pxw[16]; - int i; - - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); - - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); - - for (i = 0; i < 5; i++) - I915_WRITE(PEW(i), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW(i), 0); - - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ(i)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; - - val = vid * vid; - val *= (freq / 1000); - val *= 255; - val /= (127*127*900); - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; - - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW(i), val); - } - - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); - - for (i = 0; i < 8; i++) - I915_WRITE(PXWL(i), 0); - - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); - - lcfuse = I915_READ(LCFUSE02); - - dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} - -void intel_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!sanitize_rc6(dev_priv)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - pm_runtime_get(&dev_priv->drm.pdev->dev); - } - - /* Initialize RPS limits (for userspace) */ - if (IS_CHERRYVIEW(dev_priv)) - cherryview_init_gt_powersave(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_init_gt_powersave(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_init_rps_frequencies(dev_priv); - - /* Derive initial user preferences/limits from the hardware limits */ - rps->max_freq_softlimit = rps->max_freq; - rps->min_freq_softlimit = rps->min_freq; - - /* After setting max-softlimit, find the overclock max freq */ - if (IS_GEN(dev_priv, 6) || - IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { - u32 params = 0; - - sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, - ¶ms, NULL); - if (params & BIT(31)) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (rps->max_freq & 0xff) * 50, - (params & 0xff) * 50); - rps->max_freq = params & 0xff; - } - } - - /* Finally allow us to boost to max by default */ - rps->boost_freq = rps->max_freq; - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; -} - -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv)) - valleyview_cleanup_gt_powersave(dev_priv); - - if (!HAS_RC6(dev_priv)) - pm_runtime_put(&dev_priv->drm.pdev->dev); -} - -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ - intel_disable_gt_powersave(dev_priv); - - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_reset_rps_interrupts(dev_priv); -} - -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (!i915->gt_pm.llc_pstate.enabled) - return; - - /* Currently there is no HW configuration to be done to disable. */ - - i915->gt_pm.llc_pstate.enabled = false; -} - -static void intel_disable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rc6.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rc6(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = false; -} - -static void intel_disable_rps(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rps.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rps(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rps(dev_priv); - else if (IS_IRONLAKE_M(dev_priv)) - ironlake_disable_drps(dev_priv); - - dev_priv->gt_pm.rps.enabled = false; -} - -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) -{ - mutex_lock(&dev_priv->gt_pm.rps.lock); - - intel_disable_rc6(dev_priv); - intel_disable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_disable_llc_pstate(dev_priv); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (i915->gt_pm.llc_pstate.enabled) - return; - - gen6_update_ring_freq(i915); - - i915->gt_pm.llc_pstate.enabled = true; -} - -static void intel_enable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (dev_priv->gt_pm.rc6.enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_enable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) - gen11_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 9) - gen9_enable_rc6(dev_priv); - else if (IS_BROADWELL(dev_priv)) - gen8_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_enable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = true; -} - -static void intel_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - lockdep_assert_held(&rps->lock); - - if (rps->enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_enable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rps(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - gen8_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_enable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } - - WARN_ON(rps->max_freq < rps->min_freq); - WARN_ON(rps->idle_freq > rps->max_freq); - - WARN_ON(rps->efficient_freq < rps->min_freq); - WARN_ON(rps->efficient_freq > rps->max_freq); - - rps->enabled = true; -} - -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) -{ - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) - return; - - mutex_lock(&dev_priv->gt_pm.rps.lock); - - if (HAS_RC6(dev_priv)) - intel_enable_rc6(dev_priv); - if (HAS_RPS(dev_priv)) - intel_enable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_enable_llc_pstate(dev_priv); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -8870,7 +6414,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { - int pipe; + enum pipe pipe; u32 val; /* @@ -8892,7 +6436,6 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; if (dev_priv->vbt.fdi_rx_polarity_inverted) val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; I915_WRITE(TRANS_CHICKEN2(pipe), val); @@ -8910,8 +6453,9 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) tmp = I915_READ(MCH_SSKPD); if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) - DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", - tmp); + drm_dbg_kms(&dev_priv->drm, + "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", + tmp); } static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) @@ -9088,6 +6632,37 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) /* WaEnable32PlaneMode:icl */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE)); + + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE, + 0, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + + /* Wa_1407352427:icl,ehl */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2, + 0, PSDUNIT_CLKGATE_DIS); +} + +static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + u32 vd_pg_enable = 0; + unsigned int i; + + /* Wa_1408615072:tgl */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2, + 0, VSUNIT_CLKGATE_DIS_TGL); + + /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ + for (i = 0; i < I915_MAX_VCS; i++) { + if (HAS_ENGINE(dev_priv, _VCS(i))) + vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i); + } + + I915_WRITE(POWERGATE_ENABLE, + I915_READ(POWERGATE_ENABLE) | vd_pg_enable); } static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) @@ -9181,9 +6756,6 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) { - /* The GTT cache must be disabled if the system is using 2M pages. */ - bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, - I915_GTT_PAGE_SIZE_2M); enum pipe pipe; /* WaSwitchSolVfFArbitrationPriority:bdw */ @@ -9216,9 +6788,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); - /* WaGttCachingOffByDefault:bdw */ - I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); - /* WaKVMNotificationOnConfigChange:bdw */ I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); @@ -9483,12 +7052,6 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv) * LSQC Setting Recommendations. */ gen8_set_l3sqc_credits(dev_priv, 38, 2); - - /* - * GTT cache may not work with big pages, so if those - * are ever enabled GTT cache may need to be disabled. - */ - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); } static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) @@ -9607,7 +7170,8 @@ void intel_suspend_hw(struct drm_i915_private *dev_priv) static void nop_init_clock_gating(struct drm_i915_private *dev_priv) { - DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); + drm_dbg_kms(&dev_priv->drm, + "No clock gating settings or workarounds applied.\n"); } /** @@ -9621,7 +7185,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_GEN(dev_priv, 11)) + if (IS_GEN(dev_priv, 12)) + dev_priv->display.init_clock_gating = tgl_init_clock_gating; + else if (IS_GEN(dev_priv, 11)) dev_priv->display.init_clock_gating = icl_init_clock_gating; else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; @@ -9672,9 +7238,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv) { /* For cxsr */ if (IS_PINEVIEW(dev_priv)) - i915_pineview_get_mem_freq(dev_priv); + pnv_get_mem_freq(dev_priv); else if (IS_GEN(dev_priv, 5)) - i915_ironlake_get_mem_freq(dev_priv); + ilk_get_mem_freq(dev_priv); + + if (intel_has_sagv(dev_priv)) + skl_setup_sagv_block_time(dev_priv); /* For FIFO watermark updates */ if (INTEL_GEN(dev_priv) >= 9) { @@ -9697,8 +7266,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.optimize_watermarks = ilk_optimize_watermarks; } else { - DRM_DEBUG_KMS("Failed to read display plane latency. " - "Disable CxSR\n"); + drm_dbg_kms(&dev_priv->drm, + "Failed to read display plane latency. " + "Disable CxSR\n"); } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); @@ -9718,7 +7288,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->is_ddr3, dev_priv->fsb_freq, dev_priv->mem_freq)) { - DRM_INFO("failed to find known CxSR latency " + drm_info(&dev_priv->drm, + "failed to find known CxSR latency " "(found ddr%s fsb freq %d, mem freq %d), " "disabling CxSR\n", (dev_priv->is_ddr3 == 1) ? "3" : "2", @@ -9727,14 +7298,14 @@ void intel_init_pm(struct drm_i915_private *dev_priv) intel_set_memory_cxsr(dev_priv, false); dev_priv->display.update_wm = NULL; } else - dev_priv->display.update_wm = pineview_update_wm; + dev_priv->display.update_wm = pnv_update_wm; } else if (IS_GEN(dev_priv, 4)) { dev_priv->display.update_wm = i965_update_wm; } else if (IS_GEN(dev_priv, 3)) { dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; } else if (IS_GEN(dev_priv, 2)) { - if (INTEL_INFO(dev_priv)->num_pipes == 1) { + if (INTEL_NUM_PIPES(dev_priv) == 1) { dev_priv->display.update_wm = i845_update_wm; dev_priv->display.get_fifo_size = i845_get_fifo_size; } else { @@ -9742,221 +7313,13 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.get_fifo_size = i830_get_fifo_size; } } else { - DRM_ERROR("unexpected fall-through in intel_init_pm\n"); + drm_err(&dev_priv->drm, + "unexpected fall-through in %s\n", __func__); } } -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val - 0xb7 - * Slow = Fast = GPLL ref * N - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); -} - -static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; -} - -static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val / 2 - * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); -} - -static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; -} - -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, - GEN9_FREQ_SCALER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_gpu_freq(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_gpu_freq(dev_priv, val); - else - return val * GT_FREQUENCY_MULTIPLIER; -} - -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, - GT_FREQUENCY_MULTIPLIER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_freq_opcode(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_freq_opcode(dev_priv, val); - else - return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); -} - void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->gt_pm.rps.lock); - mutex_init(&dev_priv->gt_pm.rps.power.mutex); - - atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } - -static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - u32 lower, upper, tmp; - int loop = 2; - - /* - * The register accessed do not need forcewake. We borrow - * uncore lock to prevent concurrent access to range reg. - */ - lockdep_assert_held(&dev_priv->uncore.lock); - - /* - * vlv and chv residency counters are 40 bits in width. - * With a control bit, we can choose between upper or lower - * 32bit window into this counter. - * - * Although we always use the counter in high-range mode elsewhere, - * userspace may attempt to read the value before rc6 is initialised, - * before we have set the default VLV_COUNTER_CONTROL value. So always - * set the high bit to be safe. - */ - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - do { - tmp = upper; - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); - lower = I915_READ_FW(reg); - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - } while (upper != tmp && --loop); - - /* - * Everywhere else we always use VLV_COUNTER_CONTROL with the - * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set - * now. - */ - - return lower | (u64)upper << 8; -} - -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u64 time_hw, prev_hw, overflow_hw; - unsigned int fw_domains; - unsigned long flags; - unsigned int i; - u32 mul, div; - - if (!HAS_RC6(dev_priv)) - return 0; - - /* - * Store previous hw counter values for counter wrap-around handling. - * - * There are only four interesting registers and they live next to each - * other so we can use the relative address, compared to the smallest - * one as the index into driver storage. - */ - i = (i915_mmio_reg_offset(reg) - - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency))) - return 0; - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); - - spin_lock_irqsave(&uncore->lock, flags); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mul = 1000000; - div = dev_priv->czclk_freq; - overflow_hw = BIT_ULL(40); - time_hw = vlv_residency_raw(dev_priv, reg); - } else { - /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ - if (IS_GEN9_LP(dev_priv)) { - mul = 10000; - div = 12; - } else { - mul = 1280; - div = 1; - } - - overflow_hw = BIT_ULL(32); - time_hw = intel_uncore_read_fw(uncore, reg); - } - - /* - * Counter wrap handling. - * - * But relying on a sufficient frequency of queries otherwise counters - * can still wrap. - */ - prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i]; - dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw; - - /* RC6 delta from last sample. */ - if (time_hw >= prev_hw) - time_hw -= prev_hw; - else - time_hw += overflow_hw - prev_hw; - - /* Add delta to RC6 extended raw driver copy. */ - time_hw += dev_priv->gt_pm.rc6.cur_residency[i]; - dev_priv->gt_pm.rc6.cur_residency[i] = time_hw; - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irqrestore(&uncore->lock, flags); - - return mul_u64_u32_div(time_hw, mul, div); -} - -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, - i915_reg_t reg) -{ - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); -} - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) -{ - u32 cagf; - - if (INTEL_GEN(dev_priv) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - - return cagf; -} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 1b489fa399e1..c06c6a846d9a 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -10,10 +10,10 @@ #include "i915_reg.h" -struct drm_atomic_state; struct drm_device; struct drm_i915_private; struct i915_request; +struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_plane; @@ -29,16 +29,6 @@ void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_i915_private *dev_priv); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_i915_private *dev_priv); -void intel_gpu_ips_init(struct drm_i915_private *dev_priv); -void intel_gpu_ips_teardown(void); -void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_rps_busy(struct drm_i915_private *dev_priv); -void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct i915_request *rq); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); @@ -52,7 +42,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); -bool intel_can_enable_sagv(struct drm_atomic_state *state); +bool intel_can_enable_sagv(struct intel_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, @@ -64,27 +54,10 @@ void skl_write_plane_wm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); void skl_write_cursor_wm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); -bool ilk_disable_lp_wm(struct drm_device *dev); -int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, - struct intel_crtc_state *cstate); +bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg); - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); -unsigned long i915_mch_val(struct drm_i915_private *dev_priv); -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); -void i915_update_gfx_val(struct drm_i915_private *dev_priv); - -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive); bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); #endif /* __INTEL_PM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c new file mode 100644 index 000000000000..14b59b899c9b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "gem/i915_gem_lmem.h" +#include "gem/i915_gem_region.h" +#include "intel_region_lmem.h" + +static int init_fake_lmem_bar(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + struct i915_ggtt *ggtt = &i915->ggtt; + unsigned long n; + int ret; + + /* We want to 1:1 map the mappable aperture to our reserved region */ + + mem->fake_mappable.start = 0; + mem->fake_mappable.size = resource_size(&mem->region); + mem->fake_mappable.color = I915_COLOR_UNEVICTABLE; + + ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable); + if (ret) + return ret; + + mem->remap_addr = dma_map_resource(&i915->drm.pdev->dev, + mem->region.start, + mem->fake_mappable.size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_FORCE_CONTIGUOUS); + if (dma_mapping_error(&i915->drm.pdev->dev, mem->remap_addr)) { + drm_mm_remove_node(&mem->fake_mappable); + return -EINVAL; + } + + for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) { + ggtt->vm.insert_page(&ggtt->vm, + mem->remap_addr + (n << PAGE_SHIFT), + n << PAGE_SHIFT, + I915_CACHE_NONE, 0); + } + + mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr, + mem->fake_mappable.size); + + return 0; +} + +static void release_fake_lmem_bar(struct intel_memory_region *mem) +{ + if (!drm_mm_node_allocated(&mem->fake_mappable)) + return; + + drm_mm_remove_node(&mem->fake_mappable); + + dma_unmap_resource(&mem->i915->drm.pdev->dev, + mem->remap_addr, + mem->fake_mappable.size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_FORCE_CONTIGUOUS); +} + +static void +region_lmem_release(struct intel_memory_region *mem) +{ + release_fake_lmem_bar(mem); + io_mapping_fini(&mem->iomap); + intel_memory_region_release_buddy(mem); +} + +static int +region_lmem_init(struct intel_memory_region *mem) +{ + int ret; + + if (i915_modparams.fake_lmem_start) { + ret = init_fake_lmem_bar(mem); + GEM_BUG_ON(ret); + } + + if (!io_mapping_init_wc(&mem->iomap, + mem->io_start, + resource_size(&mem->region))) + return -EIO; + + ret = intel_memory_region_init_buddy(mem); + if (ret) + io_mapping_fini(&mem->iomap); + + intel_memory_region_set_name(mem, "local"); + + return ret; +} + +const struct intel_memory_region_ops intel_region_lmem_ops = { + .init = region_lmem_init, + .release = region_lmem_release, + .create_object = __i915_gem_lmem_object_create, +}; + +struct intel_memory_region * +intel_setup_fake_lmem(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + struct intel_memory_region *mem; + resource_size_t mappable_end; + resource_size_t io_start; + resource_size_t start; + + GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt)); + GEM_BUG_ON(!i915_modparams.fake_lmem_start); + + /* Your mappable aperture belongs to me now! */ + mappable_end = pci_resource_len(pdev, 2); + io_start = pci_resource_start(pdev, 2), + start = i915_modparams.fake_lmem_start; + + mem = intel_memory_region_create(i915, + start, + mappable_end, + PAGE_SIZE, + io_start, + &intel_region_lmem_ops); + if (!IS_ERR(mem)) { + drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n", + &mem->region); + drm_info(&i915->drm, + "Intel graphics fake LMEM IO start: %llx\n", + (u64)mem->io_start); + drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n", + (u64)resource_size(&mem->region)); + } + + return mem; +} diff --git a/drivers/gpu/drm/i915/intel_region_lmem.h b/drivers/gpu/drm/i915/intel_region_lmem.h new file mode 100644 index 000000000000..213def7c7b8a --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_lmem.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_REGION_LMEM_H +#define __INTEL_REGION_LMEM_H + +struct drm_i915_private; + +extern const struct intel_memory_region_ops intel_region_lmem_ops; + +struct intel_memory_region * +intel_setup_fake_lmem(struct drm_i915_private *i915); + +#endif /* !__INTEL_REGION_LMEM_H */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8d1aebc3e857..ad719c9602af 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -27,11 +27,11 @@ */ #include <linux/pm_runtime.h> -#include <linux/vgaarb.h> #include <drm/drm_print.h> #include "i915_drv.h" +#include "i915_trace.h" /** * DOC: runtime pm @@ -592,7 +592,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) pm_runtime_put(kdev); } -void intel_runtime_pm_cleanup(struct intel_runtime_pm *rpm) +void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) { int count = atomic_read(&rpm->wakeref_count); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index 2ee8f9522e05..ae64ff14c642 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -173,7 +173,7 @@ enable_rpm_wakeref_asserts(struct intel_runtime_pm *rpm) void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm); void intel_runtime_pm_enable(struct intel_runtime_pm *rpm); void intel_runtime_pm_disable(struct intel_runtime_pm *rpm); -void intel_runtime_pm_cleanup(struct intel_runtime_pm *rpm); +void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index a115625e980c..cbfb7171d62d 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -24,10 +24,8 @@ #include <asm/iosf_mbi.h> -#include "intel_sideband.h" - #include "i915_drv.h" -#include "intel_drv.h" +#include "intel_sideband.h" /* * IOSF sideband, see VLV2_SidebandMsg_HAS.docx and @@ -107,8 +105,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, if (intel_wait_for_register(uncore, VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, 5)) { - DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n", - is_read ? "read" : "write"); + drm_dbg(&i915->drm, "IOSF sideband idle wait (%s) timed out\n", + is_read ? "read" : "write"); return -EAGAIN; } @@ -131,8 +129,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, *val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA); err = 0; } else { - DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n", - is_read ? "read" : "write"); + drm_dbg(&i915->drm, "IOSF sideband finish wait (%s) timed out\n", + is_read ? "read" : "write"); err = -ETIMEDOUT; } @@ -285,7 +283,8 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, if (intel_wait_for_register_fw(uncore, SBI_CTL_STAT, SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to become ready\n"); + drm_err(&i915->drm, + "timeout waiting for SBI to become ready\n"); return -EBUSY; } @@ -303,12 +302,13 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, if (__intel_wait_for_register_fw(uncore, SBI_CTL_STAT, SBI_BUSY, 0, 100, 100, &cmd)) { - DRM_ERROR("timeout waiting for SBI to complete read\n"); + drm_err(&i915->drm, + "timeout waiting for SBI to complete read\n"); return -ETIMEDOUT; } if (cmd & SBI_RESPONSE_FAIL) { - DRM_ERROR("error during SBI read of reg %x\n", reg); + drm_err(&i915->drm, "error during SBI read of reg %x\n", reg); return -ENXIO; } @@ -428,8 +428,9 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, mutex_unlock(&i915->sb_lock); if (err) { - DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", - mbox, __builtin_return_address(0), err); + drm_dbg(&i915->drm, + "warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", + mbox, __builtin_return_address(0), err); } return err; @@ -449,8 +450,9 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, mutex_unlock(&i915->sb_lock); if (err) { - DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", - val, mbox, __builtin_return_address(0), err); + drm_dbg(&i915->drm, + "warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", + val, mbox, __builtin_return_address(0), err); } return err; @@ -521,7 +523,8 @@ int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request, * requests, and for any quirks of the PCODE firmware that delays * the request completion. */ - DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); + drm_dbg_kms(&i915->drm, + "PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); ret = wait_for_atomic(COND, 50); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c deleted file mode 100644 index ae45651ac73c..000000000000 --- a/drivers/gpu/drm/i915/intel_uc.c +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "gt/intel_reset.h" -#include "intel_uc.h" -#include "intel_guc.h" -#include "intel_guc_ads.h" -#include "intel_guc_submission.h" -#include "i915_drv.h" - -static void guc_free_load_err_log(struct intel_guc *guc); - -/* Reset GuC providing us with fresh state for both GuC and HuC. - */ -static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) -{ - int ret; - u32 guc_status; - - ret = intel_reset_guc(dev_priv); - if (ret) { - DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); - return ret; - } - - guc_status = I915_READ(GUC_STATUS); - WARN(!(guc_status & GS_MIA_IN_RESET), - "GuC status: 0x%x, MIA core expected to be in reset\n", - guc_status); - - return ret; -} - -static int __get_platform_enable_guc(struct drm_i915_private *i915) -{ - struct intel_uc_fw *guc_fw = &i915->guc.fw; - struct intel_uc_fw *huc_fw = &i915->huc.fw; - int enable_guc = 0; - - /* Default is to use HuC if we know GuC and HuC firmwares */ - if (intel_uc_fw_is_selected(guc_fw) && intel_uc_fw_is_selected(huc_fw)) - enable_guc |= ENABLE_GUC_LOAD_HUC; - - /* Any platform specific fine-tuning can be done here */ - - return enable_guc; -} - -static int __get_default_guc_log_level(struct drm_i915_private *i915) -{ - int guc_log_level; - - if (!HAS_GUC(i915) || !intel_uc_is_using_guc(i915)) - guc_log_level = GUC_LOG_LEVEL_DISABLED; - else if (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || - IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - guc_log_level = GUC_LOG_LEVEL_MAX; - else - guc_log_level = GUC_LOG_LEVEL_NON_VERBOSE; - - /* Any platform specific fine-tuning can be done here */ - - return guc_log_level; -} - -/** - * sanitize_options_early - sanitize uC related modparam options - * @i915: device private - * - * In case of "enable_guc" option this function will attempt to modify - * it only if it was initially set to "auto(-1)". Default value for this - * modparam varies between platforms and it is hardcoded in driver code. - * Any other modparam value is only monitored against availability of the - * related hardware or firmware definitions. - * - * In case of "guc_log_level" option this function will attempt to modify - * it only if it was initially set to "auto(-1)" or if initial value was - * "enable(1..4)" on platforms without the GuC. Default value for this - * modparam varies between platforms and is usually set to "disable(0)" - * unless GuC is enabled on given platform and the driver is compiled with - * debug config when this modparam will default to "enable(1..4)". - */ -static void sanitize_options_early(struct drm_i915_private *i915) -{ - struct intel_uc_fw *guc_fw = &i915->guc.fw; - struct intel_uc_fw *huc_fw = &i915->huc.fw; - - /* A negative value means "use platform default" */ - if (i915_modparams.enable_guc < 0) - i915_modparams.enable_guc = __get_platform_enable_guc(i915); - - DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n", - i915_modparams.enable_guc, - yesno(intel_uc_is_using_guc_submission(i915)), - yesno(intel_uc_is_using_huc(i915))); - - /* Verify GuC firmware availability */ - if (intel_uc_is_using_guc(i915) && !intel_uc_fw_is_selected(guc_fw)) { - DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "enable_guc", i915_modparams.enable_guc, - !HAS_GUC(i915) ? "no GuC hardware" : - "no GuC firmware"); - } - - /* Verify HuC firmware availability */ - if (intel_uc_is_using_huc(i915) && !intel_uc_fw_is_selected(huc_fw)) { - DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "enable_guc", i915_modparams.enable_guc, - !HAS_HUC(i915) ? "no HuC hardware" : - "no HuC firmware"); - } - - /* XXX: GuC submission is unavailable for now */ - if (intel_uc_is_using_guc_submission(i915)) { - DRM_INFO("Incompatible option detected: %s=%d, %s!\n", - "enable_guc", i915_modparams.enable_guc, - "GuC submission not supported"); - DRM_INFO("Switching to non-GuC submission mode!\n"); - i915_modparams.enable_guc &= ~ENABLE_GUC_SUBMISSION; - } - - /* A negative value means "use platform/config default" */ - if (i915_modparams.guc_log_level < 0) - i915_modparams.guc_log_level = - __get_default_guc_log_level(i915); - - if (i915_modparams.guc_log_level > 0 && !intel_uc_is_using_guc(i915)) { - DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "guc_log_level", i915_modparams.guc_log_level, - !HAS_GUC(i915) ? "no GuC hardware" : - "GuC not enabled"); - i915_modparams.guc_log_level = 0; - } - - if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { - DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "guc_log_level", i915_modparams.guc_log_level, - "verbosity too high"); - i915_modparams.guc_log_level = GUC_LOG_LEVEL_MAX; - } - - DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s, verbose:%s, verbosity:%d)\n", - i915_modparams.guc_log_level, - yesno(i915_modparams.guc_log_level), - yesno(GUC_LOG_LEVEL_IS_VERBOSE(i915_modparams.guc_log_level)), - GUC_LOG_LEVEL_TO_VERBOSITY(i915_modparams.guc_log_level)); - - /* Make sure that sanitization was done */ - GEM_BUG_ON(i915_modparams.enable_guc < 0); - GEM_BUG_ON(i915_modparams.guc_log_level < 0); -} - -void intel_uc_init_early(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - - intel_guc_init_early(guc); - intel_huc_init_early(huc); - - sanitize_options_early(i915); -} - -void intel_uc_cleanup_early(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - - guc_free_load_err_log(guc); -} - -/** - * intel_uc_init_mmio - setup uC MMIO access - * @i915: device private - * - * Setup minimal state necessary for MMIO accesses later in the - * initialization sequence. - */ -void intel_uc_init_mmio(struct drm_i915_private *i915) -{ - intel_guc_init_send_regs(&i915->guc); -} - -static void guc_capture_load_err_log(struct intel_guc *guc) -{ - if (!guc->log.vma || !intel_guc_log_get_level(&guc->log)) - return; - - if (!guc->load_err_log) - guc->load_err_log = i915_gem_object_get(guc->log.vma->obj); - - return; -} - -static void guc_free_load_err_log(struct intel_guc *guc) -{ - if (guc->load_err_log) - i915_gem_object_put(guc->load_err_log); -} - -static void guc_reset_interrupts(struct intel_guc *guc) -{ - guc->interrupts.reset(guc_to_i915(guc)); -} - -static void guc_enable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.enable(guc_to_i915(guc)); -} - -static void guc_disable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.disable(guc_to_i915(guc)); -} - -static int guc_enable_communication(struct intel_guc *guc) -{ - guc_enable_interrupts(guc); - - return intel_guc_ct_enable(&guc->ct); -} - -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; -} - -static void guc_disable_communication(struct intel_guc *guc) -{ - intel_guc_ct_disable(&guc->ct); - - guc_disable_interrupts(guc); - - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; -} - -int intel_uc_init_misc(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - int ret; - - if (!USES_GUC(i915)) - return 0; - - ret = intel_guc_init_misc(guc); - if (ret) - return ret; - - if (USES_HUC(i915)) { - ret = intel_huc_init_misc(huc); - if (ret) - goto err_guc; - } - - return 0; - -err_guc: - intel_guc_fini_misc(guc); - return ret; -} - -void intel_uc_fini_misc(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - - if (!USES_GUC(i915)) - return; - - if (USES_HUC(i915)) - intel_huc_fini_misc(huc); - - intel_guc_fini_misc(guc); -} - -int intel_uc_init(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - int ret; - - if (!USES_GUC(i915)) - return 0; - - if (!HAS_GUC(i915)) - return -ENODEV; - - /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(USES_GUC_SUBMISSION(i915)); - - ret = intel_guc_init(guc); - if (ret) - return ret; - - if (USES_HUC(i915)) { - ret = intel_huc_init(huc); - if (ret) - goto err_guc; - } - - if (USES_GUC_SUBMISSION(i915)) { - /* - * This is stuff we need to have available at fw load time - * if we are planning to enable submission later - */ - ret = intel_guc_submission_init(guc); - if (ret) - goto err_huc; - } - - return 0; - -err_huc: - if (USES_HUC(i915)) - intel_huc_fini(huc); -err_guc: - intel_guc_fini(guc); - return ret; -} - -void intel_uc_fini(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - - if (!USES_GUC(i915)) - return; - - GEM_BUG_ON(!HAS_GUC(i915)); - - if (USES_GUC_SUBMISSION(i915)) - intel_guc_submission_fini(guc); - - if (USES_HUC(i915)) - intel_huc_fini(&i915->huc); - - intel_guc_fini(guc); -} - -static void __uc_sanitize(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - - GEM_BUG_ON(!HAS_GUC(i915)); - - intel_huc_sanitize(huc); - intel_guc_sanitize(guc); - - __intel_uc_reset_hw(i915); -} - -void intel_uc_sanitize(struct drm_i915_private *i915) -{ - if (!USES_GUC(i915)) - return; - - __uc_sanitize(i915); -} - -int intel_uc_init_hw(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - int ret, attempts; - - if (!USES_GUC(i915)) - return 0; - - GEM_BUG_ON(!HAS_GUC(i915)); - - guc_reset_interrupts(guc); - - /* WaEnableuKernelHeaderValidFix:skl */ - /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ - if (IS_GEN(i915, 9)) - attempts = 3; - else - attempts = 1; - - while (attempts--) { - /* - * Always reset the GuC just before (re)loading, so - * that the state and timing are fairly predictable - */ - ret = __intel_uc_reset_hw(i915); - if (ret) - goto err_out; - - if (USES_HUC(i915)) { - ret = intel_huc_fw_upload(huc); - if (ret) - goto err_out; - } - - intel_guc_ads_reset(guc); - intel_guc_init_params(guc); - ret = intel_guc_fw_upload(guc); - if (ret == 0) - break; - - DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " - "retry %d more time(s)\n", ret, attempts); - } - - /* Did we succeded or run out of retries? */ - if (ret) - goto err_log_capture; - - ret = guc_enable_communication(guc); - if (ret) - goto err_log_capture; - - if (USES_HUC(i915)) { - ret = intel_huc_auth(huc); - if (ret) - goto err_communication; - } - - ret = intel_guc_sample_forcewake(guc); - if (ret) - goto err_communication; - - if (USES_GUC_SUBMISSION(i915)) { - ret = intel_guc_submission_enable(guc); - if (ret) - goto err_communication; - } - - dev_info(i915->drm.dev, "GuC firmware version %u.%u\n", - guc->fw.major_ver_found, guc->fw.minor_ver_found); - dev_info(i915->drm.dev, "GuC submission %s\n", - enableddisabled(USES_GUC_SUBMISSION(i915))); - dev_info(i915->drm.dev, "HuC %s\n", - enableddisabled(USES_HUC(i915))); - - return 0; - - /* - * We've failed to load the firmware :( - */ -err_communication: - guc_disable_communication(guc); -err_log_capture: - guc_capture_load_err_log(guc); -err_out: - __uc_sanitize(i915); - - /* - * Note that there is no fallback as either user explicitly asked for - * the GuC or driver default option was to run with the GuC enabled. - */ - if (GEM_WARN_ON(ret == -EIO)) - ret = -EINVAL; - - dev_err(i915->drm.dev, "GuC initialization failed %d\n", ret); - return ret; -} - -void intel_uc_fini_hw(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - - if (!intel_guc_is_loaded(guc)) - return; - - GEM_BUG_ON(!HAS_GUC(i915)); - - if (USES_GUC_SUBMISSION(i915)) - intel_guc_submission_disable(guc); - - guc_disable_communication(guc); - __uc_sanitize(i915); -} - -/** - * intel_uc_reset_prepare - Prepare for reset - * @i915: device private - * - * Preparing for full gpu reset. - */ -void intel_uc_reset_prepare(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - - if (!intel_guc_is_loaded(guc)) - return; - - guc_stop_communication(guc); - __uc_sanitize(i915); -} - -void intel_uc_runtime_suspend(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - int err; - - if (!intel_guc_is_loaded(guc)) - return; - - err = intel_guc_suspend(guc); - if (err) - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); - - guc_disable_communication(guc); -} - -void intel_uc_suspend(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - intel_wakeref_t wakeref; - - if (!intel_guc_is_loaded(guc)) - return; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - intel_uc_runtime_suspend(i915); -} - -int intel_uc_resume(struct drm_i915_private *i915) -{ - struct intel_guc *guc = &i915->guc; - int err; - - if (!intel_guc_is_loaded(guc)) - return 0; - - guc_enable_communication(guc); - - err = intel_guc_resume(guc); - if (err) { - DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); - return err; - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h deleted file mode 100644 index 3ea06c87dfcd..000000000000 --- a/drivers/gpu/drm/i915/intel_uc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ -#ifndef _INTEL_UC_H_ -#define _INTEL_UC_H_ - -#include "intel_guc.h" -#include "intel_huc.h" -#include "i915_params.h" - -void intel_uc_init_early(struct drm_i915_private *dev_priv); -void intel_uc_cleanup_early(struct drm_i915_private *dev_priv); -void intel_uc_init_mmio(struct drm_i915_private *dev_priv); -int intel_uc_init_misc(struct drm_i915_private *dev_priv); -void intel_uc_fini_misc(struct drm_i915_private *dev_priv); -void intel_uc_sanitize(struct drm_i915_private *dev_priv); -int intel_uc_init_hw(struct drm_i915_private *dev_priv); -void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -int intel_uc_init(struct drm_i915_private *dev_priv); -void intel_uc_fini(struct drm_i915_private *dev_priv); -void intel_uc_reset_prepare(struct drm_i915_private *i915); -void intel_uc_suspend(struct drm_i915_private *i915); -void intel_uc_runtime_suspend(struct drm_i915_private *i915); -int intel_uc_resume(struct drm_i915_private *dev_priv); - -static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915) -{ - GEM_BUG_ON(i915_modparams.enable_guc < 0); - return i915_modparams.enable_guc > 0; -} - -static inline bool intel_uc_is_using_guc_submission(struct drm_i915_private *i915) -{ - GEM_BUG_ON(i915_modparams.enable_guc < 0); - return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; -} - -static inline bool intel_uc_is_using_huc(struct drm_i915_private *i915) -{ - GEM_BUG_ON(i915_modparams.enable_guc < 0); - return i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC; -} - -#endif diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c deleted file mode 100644 index f342ddd47df8..000000000000 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright © 2016-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include <linux/bitfield.h> -#include <linux/firmware.h> -#include <drm/drm_print.h> - -#include "intel_uc_fw.h" -#include "i915_drv.h" - -/** - * intel_uc_fw_fetch - fetch uC firmware - * - * @dev_priv: device private - * @uc_fw: uC firmware - * - * Fetch uC firmware into GEM obj. - */ -void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - if (!uc_fw->path) { - dev_info(dev_priv->drm.dev, - "%s: No firmware was defined for %s!\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_platform_name(INTEL_INFO(dev_priv)->platform)); - return; - } - - DRM_DEBUG_DRIVER("%s fw fetch %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - DRM_DEBUG_DRIVER("%s fw fetch %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) { - DRM_DEBUG_DRIVER("%s fw request_firmware err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } - - DRM_DEBUG_DRIVER("%s fw size %zu ptr %p\n", - intel_uc_fw_type_repr(uc_fw->type), fw->size, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_WARN("%s: Unexpected firmware size (%zu, min %zu)\n", - intel_uc_fw_type_repr(uc_fw->type), - fw->size, sizeof(struct uc_css_header)); - err = -ENODATA; - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * - sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_WARN("%s: Mismatched firmware header definition\n", - intel_uc_fw_type_repr(uc_fw->type)); - err = -ENOEXEC; - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) { - DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", - intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw); - err = -ENOEXEC; - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_WARN("%s: Truncated firmware (%zu, expected %zu)\n", - intel_uc_fw_type_repr(uc_fw->type), fw->size, size); - err = -ENOEXEC; - goto fail; - } - - /* Get version numbers from the CSS header */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR, - css->sw_version); - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR, - css->sw_version); - break; - - default: - MISSING_CASE(uc_fw->type); - break; - } - - DRM_DEBUG_DRIVER("%s fw version %u.%u (wanted %u.%u)\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("%s: Skipping firmware version check\n", - intel_uc_fw_type_repr(uc_fw->type)); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s: Wrong firmware version (%u.%u, required %u.%u)\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - obj = i915_gem_object_create_shmem_from_data(dev_priv, - fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - DRM_DEBUG_DRIVER("%s fw object_create err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("%s fw fetch %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - release_firmware(fw); - return; - -fail: - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; - DRM_DEBUG_DRIVER("%s fw fetch %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - DRM_WARN("%s: Failed to fetch firmware %s (error %d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - DRM_INFO("%s: Firmware can be downloaded from %s\n", - intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); - - release_firmware(fw); /* OK even if fw is NULL */ -} - -static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; - struct i915_vma dummy = { - .node.start = intel_uc_fw_ggtt_offset(uc_fw), - .node.size = obj->base.size, - .pages = obj->mm.pages, - .vm = &ggtt->vm, - }; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); - - /* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy.pages); - - ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); -} - -static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; - u64 start = intel_uc_fw_ggtt_offset(uc_fw); - - ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); -} - -/** - * intel_uc_fw_upload - load uC firmware using custom loader - * @uc_fw: uC firmware - * @xfer: custom uC firmware loader function - * - * Loads uC firmware using custom loader and updates internal flags. - * - * Return: 0 on success, non-zero on failure. - */ -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw)) -{ - int err; - - DRM_DEBUG_DRIVER("%s fw load %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); - - if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -ENOEXEC; - - uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; - DRM_DEBUG_DRIVER("%s fw load %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->load_status)); - - /* Call custom loader */ - intel_uc_fw_ggtt_bind(uc_fw); - err = xfer(uc_fw); - intel_uc_fw_ggtt_unbind(uc_fw); - if (err) - goto fail; - - uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("%s fw load %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->load_status)); - - DRM_INFO("%s: Loaded firmware %s (version %u.%u)\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found); - - return 0; - -fail: - uc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; - DRM_DEBUG_DRIVER("%s fw load %s\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_uc_fw_status_repr(uc_fw->load_status)); - - DRM_WARN("%s: Failed to load firmware %s (error %d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - - return err; -} - -int intel_uc_fw_init(struct intel_uc_fw *uc_fw) -{ - int err; - - if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -ENOEXEC; - - err = i915_gem_object_pin_pages(uc_fw->obj); - if (err) - DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - - return err; -} - -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) -{ - if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return; - - i915_gem_object_unpin_pages(uc_fw->obj); -} - -u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; - struct drm_mm_node *node = &ggtt->uc_fw; - - GEM_BUG_ON(!node->allocated); - GEM_BUG_ON(upper_32_bits(node->start)); - GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); - - return lower_32_bits(node->start); -} - -/** - * intel_uc_fw_cleanup_fetch - cleanup uC firmware - * - * @uc_fw: uC firmware - * - * Cleans up uC firmware by releasing the firmware GEM obj. - */ -void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&uc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} - -/** - * intel_uc_fw_dump - dump information about uC firmware - * @uc_fw: uC firmware - * @p: the &drm_printer - * - * Pretty printer for uC firmware. - */ -void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) -{ - drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); - drm_printf(p, "\tstatus: fetch %s, load %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status), - intel_uc_fw_status_repr(uc_fw->load_status)); - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found); - drm_printf(p, "\theader: offset %u, size %u\n", - uc_fw->header_offset, uc_fw->header_size); - drm_printf(p, "\tuCode: offset %u, size %u\n", - uc_fw->ucode_offset, uc_fw->ucode_size); - drm_printf(p, "\tRSA: offset %u, size %u\n", - uc_fw->rsa_offset, uc_fw->rsa_size); -} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h deleted file mode 100644 index ff98f8661d72..000000000000 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright © 2014-2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef _INTEL_UC_FW_H_ -#define _INTEL_UC_FW_H_ - -struct drm_printer; -struct drm_i915_private; - -/* Home of GuC, HuC and DMC firmwares */ -#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" - -enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -1, - INTEL_UC_FIRMWARE_NONE = 0, - INTEL_UC_FIRMWARE_PENDING, - INTEL_UC_FIRMWARE_SUCCESS -}; - -enum intel_uc_fw_type { - INTEL_UC_FW_TYPE_GUC, - INTEL_UC_FW_TYPE_HUC -}; - -/* - * This structure encapsulates all the data needed during the process - * of fetching, caching, and loading the firmware image into the uC. - */ -struct intel_uc_fw { - const char *path; - size_t size; - struct drm_i915_gem_object *obj; - enum intel_uc_fw_status fetch_status; - enum intel_uc_fw_status load_status; - - /* - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - */ - u16 major_ver_wanted; - u16 minor_ver_wanted; - u16 major_ver_found; - u16 minor_ver_found; - - enum intel_uc_fw_type type; - u32 header_size; - u32 header_offset; - u32 rsa_size; - u32 rsa_offset; - u32 ucode_size; - u32 ucode_offset; -}; - -static inline -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - } - return "<invalid>"; -} - -static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) -{ - switch (type) { - case INTEL_UC_FW_TYPE_GUC: - return "GuC"; - case INTEL_UC_FW_TYPE_HUC: - return "HuC"; - } - return "uC"; -} - -static inline -void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, - enum intel_uc_fw_type type) -{ - uc_fw->path = NULL; - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; - uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; - uc_fw->type = type; -} - -static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw) -{ - return uc_fw->path != NULL; -} - -static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw) -{ - return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS; -} - -static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) -{ - if (intel_uc_fw_is_loaded(uc_fw)) - uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; -} - -/** - * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. - * @uc_fw: uC firmware. - * - * Get the size of the firmware and header that will be uploaded to WOPCM. - * - * Return: Upload firmware size, or zero on firmware fetch failure. - */ -static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) -{ - if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - return uc_fw->header_size + uc_fw->ucode_size; -} - -void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw); -void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw)); -int intel_uc_fw_init(struct intel_uc_fw *uc_fw); -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); -u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw); -void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); - -#endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index da33aa672c3d..5f2cf6f43b8b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -25,8 +25,8 @@ #include <asm/iosf_mbi.h> #include "i915_drv.h" +#include "i915_trace.h" #include "i915_vgpu.h" -#include "intel_drv.h" #include "intel_pm.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 @@ -34,6 +34,32 @@ #define __raw_posting_read(...) ((void)__raw_uncore_read32(__VA_ARGS__)) +void +intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) +{ + spin_lock_init(&mmio_debug->lock); + mmio_debug->unclaimed_mmio_check = 1; +} + +static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug) +{ + lockdep_assert_held(&mmio_debug->lock); + + /* Save and disable mmio debugging for the user bypass */ + if (!mmio_debug->suspend_count++) { + mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check; + mmio_debug->unclaimed_mmio_check = 0; + } +} + +static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) +{ + lockdep_assert_held(&mmio_debug->lock); + + if (!--mmio_debug->suspend_count) + mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check; +} + static const char * const forcewake_domain_names[] = { "render", "blitter", @@ -78,6 +104,8 @@ fw_domain_reset(const struct intel_uncore_forcewake_domain *d) static inline void fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) { + GEM_BUG_ON(d->uncore->fw_domains_timer & d->mask); + d->uncore->fw_domains_timer |= d->mask; d->wake_count++; hrtimer_start_range_ns(&d->timer, NSEC_PER_MSEC, @@ -322,7 +350,7 @@ static void __gen6_gt_wait_for_fifo(struct intel_uncore *uncore) /* On VLV, FIFO will be shared by both SW and HW. * So, we need to read the FREE_ENTRIES everytime */ - if (IS_VALLEYVIEW(uncore_to_i915(uncore))) + if (IS_VALLEYVIEW(uncore->i915)) n = fifo_free_entries(uncore); else n = uncore->fifo_count; @@ -331,7 +359,8 @@ static void __gen6_gt_wait_for_fifo(struct intel_uncore *uncore) if (wait_for_atomic((n = fifo_free_entries(uncore)) > GT_FIFO_NUM_RESERVED_ENTRIES, GT_FIFO_TIMEOUT_MS)) { - DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n); + drm_dbg(&uncore->i915->drm, + "GT_FIFO timeout, entries: %u\n", n); return; } } @@ -344,7 +373,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) { struct intel_uncore_forcewake_domain *domain = container_of(timer, struct intel_uncore_forcewake_domain, timer); - struct intel_uncore *uncore = forcewake_domain_to_uncore(domain); + struct intel_uncore *uncore = domain->uncore; unsigned long irqflags; assert_rpm_device_not_suspended(uncore->rpm); @@ -353,9 +382,10 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) return HRTIMER_RESTART; spin_lock_irqsave(&uncore->lock, irqflags); - if (WARN_ON(domain->wake_count == 0)) - domain->wake_count++; + uncore->fw_domains_timer &= ~domain->mask; + + GEM_BUG_ON(!domain->wake_count); if (--domain->wake_count == 0) uncore->funcs.force_wake_put(uncore, domain->mask); @@ -403,7 +433,7 @@ intel_uncore_forcewake_reset(struct intel_uncore *uncore) break; if (--retry_count == 0) { - DRM_ERROR("Timed out waiting for forcewake timers to finish\n"); + drm_err(&uncore->i915->drm, "Timed out waiting for forcewake timers to finish\n"); break; } @@ -461,7 +491,7 @@ gen6_check_for_fifo_debug(struct intel_uncore *uncore) fifodbg = __raw_uncore_read32(uncore, GTFIFODBG); if (unlikely(fifodbg)) { - DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg); + drm_dbg(&uncore->i915->drm, "GTFIFODBG = 0x08%x\n", fifodbg); __raw_uncore_write32(uncore, GTFIFODBG, fifodbg); } @@ -473,6 +503,11 @@ check_for_unclaimed_mmio(struct intel_uncore *uncore) { bool ret = false; + lockdep_assert_held(&uncore->debug->lock); + + if (uncore->debug->suspend_count) + return false; + if (intel_uncore_has_fpga_dbg_unclaimed(uncore)) ret |= fpga_check_for_unclaimed_mmio(uncore); @@ -485,15 +520,13 @@ check_for_unclaimed_mmio(struct intel_uncore *uncore) return ret; } -static void __intel_uncore_early_sanitize(struct intel_uncore *uncore, - unsigned int restore_forcewake) +static void forcewake_early_sanitize(struct intel_uncore *uncore, + unsigned int restore_forcewake) { - /* clear out unclaimed reg detection bit */ - if (check_for_unclaimed_mmio(uncore)) - DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); + GEM_BUG_ON(!intel_uncore_has_forcewake(uncore)); /* WaDisableShadowRegForCpd:chv */ - if (IS_CHERRYVIEW(uncore_to_i915(uncore))) { + if (IS_CHERRYVIEW(uncore->i915)) { __raw_uncore_write32(uncore, GTFIFOCTL, __raw_uncore_read32(uncore, GTFIFOCTL) | GT_FIFO_CTL_BLOCK_ALL_POLICY_STALL | @@ -515,6 +548,9 @@ static void __intel_uncore_early_sanitize(struct intel_uncore *uncore, void intel_uncore_suspend(struct intel_uncore *uncore) { + if (!intel_uncore_has_forcewake(uncore)) + return; + iosf_mbi_punit_acquire(); iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( &uncore->pmic_bus_access_nb); @@ -526,21 +562,24 @@ void intel_uncore_resume_early(struct intel_uncore *uncore) { unsigned int restore_forcewake; + if (intel_uncore_unclaimed_mmio(uncore)) + drm_dbg(&uncore->i915->drm, "unclaimed mmio detected on resume, clearing\n"); + + if (!intel_uncore_has_forcewake(uncore)) + return; + restore_forcewake = fetch_and_zero(&uncore->fw_domains_saved); - __intel_uncore_early_sanitize(uncore, restore_forcewake); + forcewake_early_sanitize(uncore, restore_forcewake); iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb); } void intel_uncore_runtime_resume(struct intel_uncore *uncore) { - iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb); -} + if (!intel_uncore_has_forcewake(uncore)) + return; -void intel_uncore_sanitize(struct drm_i915_private *dev_priv) -{ - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_sanitize_gt_powersave(dev_priv); + iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb); } static void __intel_uncore_forcewake_get(struct intel_uncore *uncore, @@ -601,17 +640,11 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, void intel_uncore_forcewake_user_get(struct intel_uncore *uncore) { spin_lock_irq(&uncore->lock); - if (!uncore->user_forcewake.count++) { + if (!uncore->user_forcewake_count++) { intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL); - - /* Save and disable mmio debugging for the user bypass */ - uncore->user_forcewake.saved_mmio_check = - uncore->unclaimed_mmio_check; - uncore->user_forcewake.saved_mmio_debug = - i915_modparams.mmio_debug; - - uncore->unclaimed_mmio_check = 0; - i915_modparams.mmio_debug = 0; + spin_lock(&uncore->debug->lock); + mmio_debug_suspend(uncore->debug); + spin_unlock(&uncore->debug->lock); } spin_unlock_irq(&uncore->lock); } @@ -626,15 +659,14 @@ void intel_uncore_forcewake_user_get(struct intel_uncore *uncore) void intel_uncore_forcewake_user_put(struct intel_uncore *uncore) { spin_lock_irq(&uncore->lock); - if (!--uncore->user_forcewake.count) { - if (intel_uncore_unclaimed_mmio(uncore)) - dev_info(uncore_to_i915(uncore)->drm.dev, - "Invalid mmio detected during user access\n"); + if (!--uncore->user_forcewake_count) { + spin_lock(&uncore->debug->lock); + mmio_debug_resume(uncore->debug); - uncore->unclaimed_mmio_check = - uncore->user_forcewake.saved_mmio_check; - i915_modparams.mmio_debug = - uncore->user_forcewake.saved_mmio_debug; + if (check_for_unclaimed_mmio(uncore)) + dev_info(uncore->i915->drm.dev, + "Invalid mmio detected during user access\n"); + spin_unlock(&uncore->debug->lock); intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL); } @@ -669,8 +701,7 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, fw_domains &= uncore->fw_domains; for_each_fw_domain_masked(domain, fw_domains, uncore, tmp) { - if (WARN_ON(domain->wake_count == 0)) - continue; + GEM_BUG_ON(!domain->wake_count); if (--domain->wake_count) { domain->active = true; @@ -734,23 +765,47 @@ void assert_forcewakes_inactive(struct intel_uncore *uncore) void assert_forcewakes_active(struct intel_uncore *uncore, enum forcewake_domains fw_domains) { + struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) + return; + if (!uncore->funcs.force_wake_get) return; + spin_lock_irq(&uncore->lock); + assert_rpm_wakelock_held(uncore->rpm); fw_domains &= uncore->fw_domains; WARN(fw_domains & ~uncore->fw_domains_active, "Expected %08x fw_domains to be active, but %08x are off\n", fw_domains, fw_domains & ~uncore->fw_domains_active); + + /* + * Check that the caller has an explicit wakeref and we don't mistake + * it for the auto wakeref. + */ + for_each_fw_domain_masked(domain, fw_domains, uncore, tmp) { + unsigned int actual = READ_ONCE(domain->wake_count); + unsigned int expect = 1; + + if (uncore->fw_domains_timer & domain->mask) + expect++; /* pending automatic release */ + + if (WARN(actual < expect, + "Expected domain %d to be held awake by caller, count=%d\n", + domain->id, actual)) + break; + } + + spin_unlock_irq(&uncore->lock); } /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) -#define GEN11_NEEDS_FORCE_WAKE(reg) \ - ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) - #define __gen6_reg_read_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -846,12 +901,10 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { }) #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset))) \ - __fwd = find_fw_domain(uncore, offset); \ - __fwd; \ -}) + find_fw_domain(uncore, offset) + +#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ + find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { @@ -878,6 +931,20 @@ static const i915_reg_t gen11_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen12_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -900,6 +967,13 @@ static bool is_gen##x##_shadowed(u32 offset) \ __is_genX_shadowed(8) __is_genX_shadowed(11) +__is_genX_shadowed(12) + +static enum forcewake_domains +gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) +{ + return FORCEWAKE_RENDER; +} #define __gen8_reg_write_fw_domains(uncore, offset) \ ({ \ @@ -942,8 +1016,18 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { #define __gen11_fwtable_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ - __fwd = find_fw_domain(uncore, offset); \ + const u32 __offset = (offset); \ + if (!is_gen11_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + +#define __gen12_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_gen12_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1002,9 +1086,51 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen12_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe900, 0x147ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x14800, 0x148ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x14900, 0x19fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1a000, 0x1a7ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a800, 0x1afff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1b000, 0x1bfff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1c000, 0x243ff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x40000, 0x1bffff, 0), @@ -1049,7 +1175,16 @@ unclaimed_reg_debug(struct intel_uncore *uncore, if (likely(!i915_modparams.mmio_debug)) return; + /* interrupts are disabled and re-enabled around uncore->lock usage */ + lockdep_assert_held(&uncore->lock); + + if (before) + spin_lock(&uncore->debug->lock); + __unclaimed_reg_debug(uncore, reg, read, before); + + if (!before) + spin_unlock(&uncore->debug->lock); } #define GEN2_READ_HEADER(x) \ @@ -1123,8 +1258,7 @@ static noinline void ___force_wake_auto(struct intel_uncore *uncore, static inline void __force_wake_auto(struct intel_uncore *uncore, enum forcewake_domains fw_domains) { - if (WARN_ON(!fw_domains)) - return; + GEM_BUG_ON(!fw_domains); /* Turn on all requested but inactive supported forcewake domains. */ fw_domains &= uncore->fw_domains; @@ -1145,26 +1279,24 @@ func##_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \ val = __raw_uncore_read##x(uncore, reg); \ GEN6_READ_FOOTER; \ } -#define __gen6_read(x) __gen_read(gen6, x) -#define __fwtable_read(x) __gen_read(fwtable, x) -#define __gen11_fwtable_read(x) __gen_read(gen11_fwtable, x) - -__gen11_fwtable_read(8) -__gen11_fwtable_read(16) -__gen11_fwtable_read(32) -__gen11_fwtable_read(64) -__fwtable_read(8) -__fwtable_read(16) -__fwtable_read(32) -__fwtable_read(64) -__gen6_read(8) -__gen6_read(16) -__gen6_read(32) -__gen6_read(64) - -#undef __gen11_fwtable_read -#undef __fwtable_read -#undef __gen6_read + +#define __gen_reg_read_funcs(func) \ +static enum forcewake_domains \ +func##_reg_read_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) { \ + return __##func##_reg_read_fw_domains(uncore, i915_mmio_reg_offset(reg)); \ +} \ +\ +__gen_read(func, 8) \ +__gen_read(func, 16) \ +__gen_read(func, 32) \ +__gen_read(func, 64) + +__gen_reg_read_funcs(gen12_fwtable); +__gen_reg_read_funcs(gen11_fwtable); +__gen_reg_read_funcs(fwtable); +__gen_reg_read_funcs(gen6); + +#undef __gen_reg_read_funcs #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER @@ -1225,6 +1357,9 @@ gen6_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) __raw_uncore_write##x(uncore, reg, val); \ GEN6_WRITE_FOOTER; \ } +__gen6_write(8) +__gen6_write(16) +__gen6_write(32) #define __gen_write(func, x) \ static void \ @@ -1237,38 +1372,34 @@ func##_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trac __raw_uncore_write##x(uncore, reg, val); \ GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) __gen_write(gen8, x) -#define __fwtable_write(x) __gen_write(fwtable, x) -#define __gen11_fwtable_write(x) __gen_write(gen11_fwtable, x) - -__gen11_fwtable_write(8) -__gen11_fwtable_write(16) -__gen11_fwtable_write(32) -__fwtable_write(8) -__fwtable_write(16) -__fwtable_write(32) -__gen8_write(8) -__gen8_write(16) -__gen8_write(32) -__gen6_write(8) -__gen6_write(16) -__gen6_write(32) -#undef __gen11_fwtable_write -#undef __fwtable_write -#undef __gen8_write -#undef __gen6_write +#define __gen_reg_write_funcs(func) \ +static enum forcewake_domains \ +func##_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) { \ + return __##func##_reg_write_fw_domains(uncore, i915_mmio_reg_offset(reg)); \ +} \ +\ +__gen_write(func, 8) \ +__gen_write(func, 16) \ +__gen_write(func, 32) + +__gen_reg_write_funcs(gen12_fwtable); +__gen_reg_write_funcs(gen11_fwtable); +__gen_reg_write_funcs(fwtable); +__gen_reg_write_funcs(gen8); + +#undef __gen_reg_write_funcs #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define ASSIGN_WRITE_MMIO_VFUNCS(uncore, x) \ +#define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \ do { \ (uncore)->funcs.mmio_writeb = x##_write8; \ (uncore)->funcs.mmio_writew = x##_write16; \ (uncore)->funcs.mmio_writel = x##_write32; \ } while (0) -#define ASSIGN_READ_MMIO_VFUNCS(uncore, x) \ +#define ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, x) \ do { \ (uncore)->funcs.mmio_readb = x##_read8; \ (uncore)->funcs.mmio_readw = x##_read16; \ @@ -1276,24 +1407,39 @@ do { \ (uncore)->funcs.mmio_readq = x##_read64; \ } while (0) +#define ASSIGN_WRITE_MMIO_VFUNCS(uncore, x) \ +do { \ + ASSIGN_RAW_WRITE_MMIO_VFUNCS((uncore), x); \ + (uncore)->funcs.write_fw_domains = x##_reg_write_fw_domains; \ +} while (0) + +#define ASSIGN_READ_MMIO_VFUNCS(uncore, x) \ +do { \ + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, x); \ + (uncore)->funcs.read_fw_domains = x##_reg_read_fw_domains; \ +} while (0) -static void fw_domain_init(struct intel_uncore *uncore, - enum forcewake_domain_id domain_id, - i915_reg_t reg_set, - i915_reg_t reg_ack) +static int __fw_domain_init(struct intel_uncore *uncore, + enum forcewake_domain_id domain_id, + i915_reg_t reg_set, + i915_reg_t reg_ack) { struct intel_uncore_forcewake_domain *d; - if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT)) - return; + GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT); + GEM_BUG_ON(uncore->fw_domain[domain_id]); - d = &uncore->fw_domain[domain_id]; + if (i915_inject_probe_failure(uncore->i915)) + return -ENOMEM; - WARN_ON(d->wake_count); + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; WARN_ON(!i915_mmio_reg_valid(reg_set)); WARN_ON(!i915_mmio_reg_valid(reg_ack)); + d->uncore = uncore; d->wake_count = 0; d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set); d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack); @@ -1310,7 +1456,6 @@ static void fw_domain_init(struct intel_uncore *uncore, BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); - d->mask = BIT(domain_id); hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1319,6 +1464,10 @@ static void fw_domain_init(struct intel_uncore *uncore, uncore->fw_domains |= BIT(domain_id); fw_domain_reset(d); + + uncore->fw_domain[domain_id] = d; + + return 0; } static void fw_domain_fini(struct intel_uncore *uncore, @@ -1326,30 +1475,41 @@ static void fw_domain_fini(struct intel_uncore *uncore, { struct intel_uncore_forcewake_domain *d; - if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT)) - return; + GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT); - d = &uncore->fw_domain[domain_id]; + d = fetch_and_zero(&uncore->fw_domain[domain_id]); + if (!d) + return; + uncore->fw_domains &= ~BIT(domain_id); WARN_ON(d->wake_count); WARN_ON(hrtimer_cancel(&d->timer)); - memset(d, 0, sizeof(*d)); + kfree(d); +} - uncore->fw_domains &= ~BIT(domain_id); +static void intel_uncore_fw_domains_fini(struct intel_uncore *uncore) +{ + struct intel_uncore_forcewake_domain *d; + int tmp; + + for_each_fw_domain(d, uncore, tmp) + fw_domain_fini(uncore, d->id); } -static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) +static int intel_uncore_fw_domains_init(struct intel_uncore *uncore) { - struct drm_i915_private *i915 = uncore_to_i915(uncore); + struct drm_i915_private *i915 = uncore->i915; + int ret = 0; - if (!intel_uncore_has_forcewake(uncore)) - return; + GEM_BUG_ON(!intel_uncore_has_forcewake(uncore)); + +#define fw_domain_init(uncore__, id__, set__, ack__) \ + (ret ?: (ret = __fw_domain_init((uncore__), (id__), (set__), (ack__)))) if (INTEL_GEN(i915) >= 11) { int i; - uncore->funcs.force_wake_get = - fw_domains_get_with_fallback; + uncore->funcs.force_wake_get = fw_domains_get_with_fallback; uncore->funcs.force_wake_put = fw_domains_put; fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, @@ -1357,6 +1517,7 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER, FORCEWAKE_BLITTER_GEN9, FORCEWAKE_ACK_BLITTER_GEN9); + for (i = 0; i < I915_MAX_VCS; i++) { if (!HAS_ENGINE(i915, _VCS(i))) continue; @@ -1374,8 +1535,7 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i)); } } else if (IS_GEN_RANGE(i915, 9, 10)) { - uncore->funcs.force_wake_get = - fw_domains_get_with_fallback; + uncore->funcs.force_wake_get = fw_domains_get_with_fallback; uncore->funcs.force_wake_put = fw_domains_put; fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, @@ -1424,8 +1584,10 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) __raw_uncore_write32(uncore, FORCEWAKE, 0); __raw_posting_read(uncore, ECOBUS); - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, - FORCEWAKE_MT, FORCEWAKE_MT_ACK); + ret = __fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, + FORCEWAKE_MT, FORCEWAKE_MT_ACK); + if (ret) + goto out; spin_lock_irq(&uncore->lock); fw_domains_get_with_thread_status(uncore, FORCEWAKE_RENDER); @@ -1434,8 +1596,9 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) spin_unlock_irq(&uncore->lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { - DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n"); - DRM_INFO("when using vblank-synced partial screen updates.\n"); + drm_info(&i915->drm, "No MT forcewake available on Ivybridge, this can result in issues\n"); + drm_info(&i915->drm, "when using vblank-synced partial screen updates.\n"); + fw_domain_fini(uncore, FW_DOMAIN_ID_RENDER); fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); } @@ -1447,8 +1610,16 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore) FORCEWAKE, FORCEWAKE_ACK); } +#undef fw_domain_init + /* All future platforms are expected to require complex power gating */ - WARN_ON(uncore->fw_domains == 0); + WARN_ON(!ret && uncore->fw_domains == 0); + +out: + if (ret) + intel_uncore_fw_domains_fini(uncore); + + return ret; } #define ASSIGN_FW_DOMAINS_TABLE(uncore, d) \ @@ -1493,7 +1664,7 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, static int uncore_mmio_setup(struct intel_uncore *uncore) { - struct drm_i915_private *i915 = uncore_to_i915(uncore); + struct drm_i915_private *i915 = uncore->i915; struct pci_dev *pdev = i915->drm.pdev; int mmio_bar; int mmio_size; @@ -1513,8 +1684,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore) mmio_size = 2 * 1024 * 1024; uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size); if (uncore->regs == NULL) { - DRM_ERROR("failed to map registers\n"); - + drm_err(&i915->drm, "failed to map registers\n"); return -EIO; } @@ -1523,49 +1693,46 @@ static int uncore_mmio_setup(struct intel_uncore *uncore) static void uncore_mmio_cleanup(struct intel_uncore *uncore) { - struct drm_i915_private *i915 = uncore_to_i915(uncore); - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = uncore->i915->drm.pdev; pci_iounmap(pdev, uncore->regs); } -void intel_uncore_init_early(struct intel_uncore *uncore) +void intel_uncore_init_early(struct intel_uncore *uncore, + struct drm_i915_private *i915) { spin_lock_init(&uncore->lock); + uncore->i915 = i915; + uncore->rpm = &i915->runtime_pm; + uncore->debug = &i915->mmio_debug; } -int intel_uncore_init_mmio(struct intel_uncore *uncore) +static void uncore_raw_init(struct intel_uncore *uncore) { - struct drm_i915_private *i915 = uncore_to_i915(uncore); - int ret; - - ret = uncore_mmio_setup(uncore); - if (ret) - return ret; + GEM_BUG_ON(intel_uncore_has_forcewake(uncore)); - i915_check_vgpu(i915); - - if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915)) - uncore->flags |= UNCORE_HAS_FORCEWAKE; + if (IS_GEN(uncore->i915, 5)) { + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5); + } else { + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen2); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen2); + } +} - intel_uncore_fw_domains_init(uncore); - __intel_uncore_early_sanitize(uncore, 0); +static int uncore_forcewake_init(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + int ret; - uncore->unclaimed_mmio_check = 1; - uncore->pmic_bus_access_nb.notifier_call = - i915_pmic_bus_access_notifier; + GEM_BUG_ON(!intel_uncore_has_forcewake(uncore)); - uncore->rpm = &i915->runtime_pm; + ret = intel_uncore_fw_domains_init(uncore); + if (ret) + return ret; + forcewake_early_sanitize(uncore, 0); - if (!intel_uncore_has_forcewake(uncore)) { - if (IS_GEN(i915, 5)) { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen5); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen5); - } else { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen2); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen2); - } - } else if (IS_GEN_RANGE(i915, 6, 7)) { + if (IS_GEN_RANGE(i915, 6, 7)) { ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); if (IS_VALLEYVIEW(i915)) { @@ -1579,7 +1746,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); @@ -1588,12 +1754,48 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { + } else if (IS_GEN(i915, 11)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); } + uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; + iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb); + + return 0; +} + +int intel_uncore_init_mmio(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + int ret; + + ret = uncore_mmio_setup(uncore); + if (ret) + return ret; + + if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915)) + uncore->flags |= UNCORE_HAS_FORCEWAKE; + + if (!intel_uncore_has_forcewake(uncore)) { + uncore_raw_init(uncore); + } else { + ret = uncore_forcewake_init(uncore); + if (ret) + goto out_mmio_cleanup; + } + + /* make sure fw funcs are set if and only if we have fw*/ + GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.force_wake_get); + GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.force_wake_put); + GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.read_fw_domains); + GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.write_fw_domains); + if (HAS_FPGA_DBG_UNCLAIMED(i915)) uncore->flags |= UNCORE_HAS_FPGA_DBG_UNCLAIMED; @@ -1603,9 +1805,16 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) if (IS_GEN_RANGE(i915, 6, 7)) uncore->flags |= UNCORE_HAS_FIFO; - iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb); + /* clear out unclaimed reg detection bit */ + if (intel_uncore_unclaimed_mmio(uncore)) + drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n"); return 0; + +out_mmio_cleanup: + uncore_mmio_cleanup(uncore); + + return ret; } /* @@ -1615,45 +1824,46 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) */ void intel_uncore_prune_mmio_domains(struct intel_uncore *uncore) { - struct drm_i915_private *i915 = uncore_to_i915(uncore); + struct drm_i915_private *i915 = uncore->i915; + enum forcewake_domains fw_domains = uncore->fw_domains; + enum forcewake_domain_id domain_id; + int i; - if (INTEL_GEN(i915) >= 11) { - enum forcewake_domains fw_domains = uncore->fw_domains; - enum forcewake_domain_id domain_id; - int i; + if (!intel_uncore_has_forcewake(uncore) || INTEL_GEN(i915) < 11) + return; - for (i = 0; i < I915_MAX_VCS; i++) { - domain_id = FW_DOMAIN_ID_MEDIA_VDBOX0 + i; + for (i = 0; i < I915_MAX_VCS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VDBOX0 + i; - if (HAS_ENGINE(i915, _VCS(i))) - continue; + if (HAS_ENGINE(i915, _VCS(i))) + continue; - if (fw_domains & BIT(domain_id)) - fw_domain_fini(uncore, domain_id); - } + if (fw_domains & BIT(domain_id)) + fw_domain_fini(uncore, domain_id); + } - for (i = 0; i < I915_MAX_VECS; i++) { - domain_id = FW_DOMAIN_ID_MEDIA_VEBOX0 + i; + for (i = 0; i < I915_MAX_VECS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VEBOX0 + i; - if (HAS_ENGINE(i915, _VECS(i))) - continue; + if (HAS_ENGINE(i915, _VECS(i))) + continue; - if (fw_domains & BIT(domain_id)) - fw_domain_fini(uncore, domain_id); - } + if (fw_domains & BIT(domain_id)) + fw_domain_fini(uncore, domain_id); } } void intel_uncore_fini_mmio(struct intel_uncore *uncore) { - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_uncore_sanitize(uncore_to_i915(uncore)); + if (intel_uncore_has_forcewake(uncore)) { + iosf_mbi_punit_acquire(); + iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + &uncore->pmic_bus_access_nb); + intel_uncore_forcewake_reset(uncore); + intel_uncore_fw_domains_fini(uncore); + iosf_mbi_punit_release(); + } - iosf_mbi_punit_acquire(); - iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( - &uncore->pmic_bus_access_nb); - intel_uncore_forcewake_reset(uncore); - iosf_mbi_punit_release(); uncore_mmio_cleanup(uncore); } @@ -1665,7 +1875,7 @@ static const struct reg_whitelist { } reg_read_whitelist[] = { { .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE), .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE), - .gen_mask = INTEL_GEN_MASK(4, 11), + .gen_mask = INTEL_GEN_MASK(4, 12), .size = 8 } }; @@ -1749,7 +1959,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, * wish to wait without holding forcewake for the duration (i.e. you expect * the wait to be slow). * - * Returns 0 if the register matches the desired condition, or -ETIMEOUT. + * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ int __intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t reg, @@ -1797,7 +2007,7 @@ int __intel_wait_for_register_fw(struct intel_uncore *uncore, * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * - * Returns 0 if the register matches the desired condition, or -ETIMEOUT. + * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ int __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t reg, @@ -1841,7 +2051,13 @@ int __intel_wait_for_register(struct intel_uncore *uncore, bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore) { - return check_for_unclaimed_mmio(uncore); + bool ret; + + spin_lock_irq(&uncore->debug->lock); + ret = check_for_unclaimed_mmio(uncore); + spin_unlock_irq(&uncore->debug->lock); + + return ret; } bool @@ -1849,84 +2065,29 @@ intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore) { bool ret = false; - spin_lock_irq(&uncore->lock); + spin_lock_irq(&uncore->debug->lock); - if (unlikely(uncore->unclaimed_mmio_check <= 0)) + if (unlikely(uncore->debug->unclaimed_mmio_check <= 0)) goto out; - if (unlikely(intel_uncore_unclaimed_mmio(uncore))) { + if (unlikely(check_for_unclaimed_mmio(uncore))) { if (!i915_modparams.mmio_debug) { - DRM_DEBUG("Unclaimed register detected, " - "enabling oneshot unclaimed register reporting. " - "Please use i915.mmio_debug=N for more information.\n"); + drm_dbg(&uncore->i915->drm, + "Unclaimed register detected, " + "enabling oneshot unclaimed register reporting. " + "Please use i915.mmio_debug=N for more information.\n"); i915_modparams.mmio_debug++; } - uncore->unclaimed_mmio_check--; + uncore->debug->unclaimed_mmio_check--; ret = true; } out: - spin_unlock_irq(&uncore->lock); + spin_unlock_irq(&uncore->debug->lock); return ret; } -static enum forcewake_domains -intel_uncore_forcewake_for_read(struct intel_uncore *uncore, - i915_reg_t reg) -{ - struct drm_i915_private *i915 = uncore_to_i915(uncore); - u32 offset = i915_mmio_reg_offset(reg); - enum forcewake_domains fw_domains; - - if (INTEL_GEN(i915) >= 11) { - fw_domains = __gen11_fwtable_reg_read_fw_domains(uncore, offset); - } else if (HAS_FWTABLE(i915)) { - fw_domains = __fwtable_reg_read_fw_domains(uncore, offset); - } else if (INTEL_GEN(i915) >= 6) { - fw_domains = __gen6_reg_read_fw_domains(uncore, offset); - } else { - /* on devices with FW we expect to hit one of the above cases */ - if (intel_uncore_has_forcewake(uncore)) - MISSING_CASE(INTEL_GEN(i915)); - - fw_domains = 0; - } - - WARN_ON(fw_domains & ~uncore->fw_domains); - - return fw_domains; -} - -static enum forcewake_domains -intel_uncore_forcewake_for_write(struct intel_uncore *uncore, - i915_reg_t reg) -{ - struct drm_i915_private *i915 = uncore_to_i915(uncore); - u32 offset = i915_mmio_reg_offset(reg); - enum forcewake_domains fw_domains; - - if (INTEL_GEN(i915) >= 11) { - fw_domains = __gen11_fwtable_reg_write_fw_domains(uncore, offset); - } else if (HAS_FWTABLE(i915) && !IS_VALLEYVIEW(i915)) { - fw_domains = __fwtable_reg_write_fw_domains(uncore, offset); - } else if (IS_GEN(i915, 8)) { - fw_domains = __gen8_reg_write_fw_domains(uncore, offset); - } else if (IS_GEN_RANGE(i915, 6, 7)) { - fw_domains = FORCEWAKE_RENDER; - } else { - /* on devices with FW we expect to hit one of the above cases */ - if (intel_uncore_has_forcewake(uncore)) - MISSING_CASE(INTEL_GEN(i915)); - - fw_domains = 0; - } - - WARN_ON(fw_domains & ~uncore->fw_domains); - - return fw_domains; -} - /** * intel_uncore_forcewake_for_reg - which forcewake domains are needed to access * a register @@ -1953,10 +2114,12 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return 0; if (op & FW_REG_READ) - fw_domains = intel_uncore_forcewake_for_read(uncore, reg); + fw_domains = uncore->funcs.read_fw_domains(uncore, reg); if (op & FW_REG_WRITE) - fw_domains |= intel_uncore_forcewake_for_write(uncore, reg); + fw_domains |= uncore->funcs.write_fw_domains(uncore, reg); + + WARN_ON(fw_domains & ~uncore->fw_domains); return fw_domains; } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 804a0faacc91..dcfa243892c6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -36,6 +36,13 @@ struct drm_i915_private; struct intel_runtime_pm; struct intel_uncore; +struct intel_uncore_mmio_debug { + spinlock_t lock; /** lock is also taken in irq contexts. */ + int unclaimed_mmio_check; + int saved_mmio_check; + u32 suspend_count; +}; + enum forcewake_domain_id { FW_DOMAIN_ID_RENDER = 0, FW_DOMAIN_ID_BLITTER, @@ -70,6 +77,11 @@ struct intel_uncore_funcs { void (*force_wake_put)(struct intel_uncore *uncore, enum forcewake_domains domains); + enum forcewake_domains (*read_fw_domains)(struct intel_uncore *uncore, + i915_reg_t r); + enum forcewake_domains (*write_fw_domains)(struct intel_uncore *uncore, + i915_reg_t r); + u8 (*mmio_readb)(struct intel_uncore *uncore, i915_reg_t r, bool trace); u16 (*mmio_readw)(struct intel_uncore *uncore, @@ -97,6 +109,7 @@ struct intel_forcewake_range { struct intel_uncore { void __iomem *regs; + struct drm_i915_private *i915; struct intel_runtime_pm *rpm; spinlock_t lock; /** lock is also taken in irq contexts. */ @@ -117,9 +130,11 @@ struct intel_uncore { enum forcewake_domains fw_domains; enum forcewake_domains fw_domains_active; + enum forcewake_domains fw_domains_timer; enum forcewake_domains fw_domains_saved; /* user domains saved for S3 */ struct intel_uncore_forcewake_domain { + struct intel_uncore *uncore; enum forcewake_domain_id id; enum forcewake_domains mask; unsigned int wake_count; @@ -127,32 +142,21 @@ struct intel_uncore { struct hrtimer timer; u32 __iomem *reg_set; u32 __iomem *reg_ack; - } fw_domain[FW_DOMAIN_ID_COUNT]; + } *fw_domain[FW_DOMAIN_ID_COUNT]; - struct { - unsigned int count; + unsigned int user_forcewake_count; - int saved_mmio_check; - int saved_mmio_debug; - } user_forcewake; - - int unclaimed_mmio_check; + struct intel_uncore_mmio_debug *debug; }; /* Iterate over initialised fw domains */ #define for_each_fw_domain_masked(domain__, mask__, uncore__, tmp__) \ - for (tmp__ = (mask__); \ - tmp__ ? (domain__ = &(uncore__)->fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) + for (tmp__ = (mask__); tmp__ ;) \ + for_each_if(domain__ = (uncore__)->fw_domain[__mask_next_bit(tmp__)]) #define for_each_fw_domain(domain__, uncore__, tmp__) \ for_each_fw_domain_masked(domain__, (uncore__)->fw_domains, uncore__, tmp__) -static inline struct intel_uncore * -forcewake_domain_to_uncore(const struct intel_uncore_forcewake_domain *d) -{ - return container_of(d, struct intel_uncore, fw_domain[d->id]); -} - static inline bool intel_uncore_has_forcewake(const struct intel_uncore *uncore) { @@ -177,8 +181,10 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } -void intel_uncore_sanitize(struct drm_i915_private *dev_priv); -void intel_uncore_init_early(struct intel_uncore *uncore); +void +intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); +void intel_uncore_init_early(struct intel_uncore *uncore, + struct drm_i915_private *i915); int intel_uncore_init_mmio(struct intel_uncore *uncore); void intel_uncore_prune_mmio_domains(struct intel_uncore *uncore); bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore); @@ -372,23 +378,35 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore, static inline void intel_uncore_rmw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; - val = intel_uncore_read(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write(uncore, reg, val); + old = intel_uncore_read(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write(uncore, reg, val); } static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; + + old = intel_uncore_read_fw(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write_fw(uncore, reg, val); +} + +static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, + i915_reg_t reg, u32 val, + u32 mask, u32 expected_val) +{ + u32 reg_val; + + intel_uncore_write(uncore, reg, val); + reg_val = intel_uncore_read(uncore, reg); - val = intel_uncore_read_fw(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write_fw(uncore, reg, val); + return (reg_val & mask) != expected_val ? -EINVAL : 0; } #define raw_reg_read(base, reg) \ diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 3db6fa682823..8fbf6f4d3f26 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -4,25 +4,25 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/wait_bit.h> + #include "intel_runtime_pm.h" -#include "i915_gem.h" +#include "intel_wakeref.h" -static void rpm_get(struct intel_runtime_pm *rpm, struct intel_wakeref *wf) +static void rpm_get(struct intel_wakeref *wf) { - wf->wakeref = intel_runtime_pm_get(rpm); + wf->wakeref = intel_runtime_pm_get(wf->rpm); } -static void rpm_put(struct intel_runtime_pm *rpm, struct intel_wakeref *wf) +static void rpm_put(struct intel_wakeref *wf) { intel_wakeref_t wakeref = fetch_and_zero(&wf->wakeref); - intel_runtime_pm_put(rpm, wakeref); - GEM_BUG_ON(!wakeref); + intel_runtime_pm_put(wf->rpm, wakeref); + INTEL_WAKEREF_BUG_ON(!wakeref); } -int __intel_wakeref_get_first(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)) +int __intel_wakeref_get_first(struct intel_wakeref *wf) { /* * Treat get/put as different subclasses, as we may need to run @@ -34,11 +34,11 @@ int __intel_wakeref_get_first(struct intel_runtime_pm *rpm, if (!atomic_read(&wf->count)) { int err; - rpm_get(rpm, wf); + rpm_get(wf); - err = fn(wf); + err = wf->ops->get(wf); if (unlikely(err)) { - rpm_put(rpm, wf); + rpm_put(wf); mutex_unlock(&wf->mutex); return err; } @@ -48,30 +48,79 @@ int __intel_wakeref_get_first(struct intel_runtime_pm *rpm, atomic_inc(&wf->count); mutex_unlock(&wf->mutex); + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); return 0; } -int __intel_wakeref_put_last(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)) +static void ____intel_wakeref_put_last(struct intel_wakeref *wf) { - int err; + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_dec_and_test(&wf->count))) + goto unlock; + + /* ops->put() must reschedule its own release on error/deferral */ + if (likely(!wf->ops->put(wf))) { + rpm_put(wf); + wake_up_var(&wf->wakeref); + } - err = fn(wf); - if (likely(!err)) - rpm_put(rpm, wf); - else - atomic_inc(&wf->count); +unlock: mutex_unlock(&wf->mutex); +} + +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) +{ + INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); - return err; + /* Assume we are not in process context and so cannot sleep. */ + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { + schedule_work(&wf->work); + return; + } + + ____intel_wakeref_put_last(wf); } -void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key) +static void __intel_wakeref_put_work(struct work_struct *wrk) { - __mutex_init(&wf->mutex, "wakeref", key); + struct intel_wakeref *wf = container_of(wrk, typeof(*wf), work); + + if (atomic_add_unless(&wf->count, -1, 1)) + return; + + mutex_lock(&wf->mutex); + ____intel_wakeref_put_last(wf); +} + +void __intel_wakeref_init(struct intel_wakeref *wf, + struct intel_runtime_pm *rpm, + const struct intel_wakeref_ops *ops, + struct intel_wakeref_lockclass *key) +{ + wf->rpm = rpm; + wf->ops = ops; + + __mutex_init(&wf->mutex, "wakeref.mutex", &key->mutex); atomic_set(&wf->count, 0); wf->wakeref = 0; + + INIT_WORK(&wf->work, __intel_wakeref_put_work); + lockdep_init_map(&wf->work.lockdep_map, "wakeref.work", &key->work, 0); +} + +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) +{ + int err; + + might_sleep(); + + err = wait_var_event_killable(&wf->wakeref, + !intel_wakeref_is_active(wf)); + if (err) + return err; + + intel_wakeref_unlock_wait(wf); + return 0; } static void wakeref_auto_timeout(struct timer_list *t) @@ -115,7 +164,7 @@ void intel_wakeref_auto(struct intel_wakeref_auto *wf, unsigned long timeout) if (!refcount_inc_not_zero(&wf->count)) { spin_lock_irqsave(&wf->lock, flags); if (!refcount_inc_not_zero(&wf->count)) { - GEM_BUG_ON(wf->wakeref); + INTEL_WAKEREF_BUG_ON(wf->wakeref); wf->wakeref = intel_runtime_pm_get_if_in_use(wf->rpm); refcount_set(&wf->count, 1); } @@ -134,5 +183,5 @@ void intel_wakeref_auto(struct intel_wakeref_auto *wf, unsigned long timeout) void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf) { intel_wakeref_auto(wf, 0); - GEM_BUG_ON(wf->wakeref); + INTEL_WAKEREF_BUG_ON(wf->wakeref); } diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 38275310b196..7d1e676b71ef 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -8,41 +8,63 @@ #define INTEL_WAKEREF_H #include <linux/atomic.h> +#include <linux/bits.h> +#include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/stackdepot.h> #include <linux/timer.h> +#include <linux/workqueue.h> + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) +#define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr) +#else +#define INTEL_WAKEREF_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) +#endif struct intel_runtime_pm; +struct intel_wakeref; typedef depot_stack_handle_t intel_wakeref_t; +struct intel_wakeref_ops { + int (*get)(struct intel_wakeref *wf); + int (*put)(struct intel_wakeref *wf); +}; + struct intel_wakeref { atomic_t count; struct mutex mutex; + intel_wakeref_t wakeref; + + struct intel_runtime_pm *rpm; + const struct intel_wakeref_ops *ops; + + struct work_struct work; +}; + +struct intel_wakeref_lockclass { + struct lock_class_key mutex; + struct lock_class_key work; }; void __intel_wakeref_init(struct intel_wakeref *wf, - struct lock_class_key *key); -#define intel_wakeref_init(wf) do { \ - static struct lock_class_key __key; \ + struct intel_runtime_pm *rpm, + const struct intel_wakeref_ops *ops, + struct intel_wakeref_lockclass *key); +#define intel_wakeref_init(wf, rpm, ops) do { \ + static struct intel_wakeref_lockclass __key; \ \ - __intel_wakeref_init((wf), &__key); \ + __intel_wakeref_init((wf), (rpm), (ops), &__key); \ } while (0) -int __intel_wakeref_get_first(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)); -int __intel_wakeref_put_last(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)); +int __intel_wakeref_get_first(struct intel_wakeref *wf); +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); /** * intel_wakeref_get: Acquire the wakeref - * @i915: the drm_i915_private device * @wf: the wakeref - * @fn: callback for acquired the wakeref, called only on first acquire. * * Acquire a hold on the wakeref. The first user to do so, will acquire * the runtime pm wakeref and then call the @fn underneath the wakeref @@ -55,17 +77,32 @@ int __intel_wakeref_put_last(struct intel_runtime_pm *rpm, * code otherwise. */ static inline int -intel_wakeref_get(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)) +intel_wakeref_get(struct intel_wakeref *wf) { + might_sleep(); if (unlikely(!atomic_inc_not_zero(&wf->count))) - return __intel_wakeref_get_first(rpm, wf, fn); + return __intel_wakeref_get_first(wf); return 0; } /** + * __intel_wakeref_get: Acquire the wakeref, again + * @wf: the wakeref + * + * Increment the wakeref counter, only valid if it is already held by + * the caller. + * + * See intel_wakeref_get(). + */ +static inline void +__intel_wakeref_get(struct intel_wakeref *wf) +{ + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + atomic_inc(&wf->count); +} + +/** * intel_wakeref_get_if_in_use: Acquire the wakeref * @wf: the wakeref * @@ -81,10 +118,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) } /** - * intel_wakeref_put: Release the wakeref - * @i915: the drm_i915_private device + * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref - * @fn: callback for releasing the wakeref, called only on final release. + * @flags: control flags * * Release our hold on the wakeref. When there are no more users, * the runtime pm wakeref will be released after the @fn callback is called @@ -96,15 +132,26 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) * Returns: 0 if the wakeref was released successfully, or a negative error * code otherwise. */ -static inline int -intel_wakeref_put(struct intel_runtime_pm *rpm, - struct intel_wakeref *wf, - int (*fn)(struct intel_wakeref *wf)) +static inline void +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) +#define INTEL_WAKEREF_PUT_ASYNC BIT(0) { - if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex)) - return __intel_wakeref_put_last(rpm, wf, fn); + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) + __intel_wakeref_put_last(wf, flags); +} - return 0; +static inline void +intel_wakeref_put(struct intel_wakeref *wf) +{ + might_sleep(); + __intel_wakeref_put(wf, 0); +} + +static inline void +intel_wakeref_put_async(struct intel_wakeref *wf) +{ + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); } /** @@ -136,17 +183,57 @@ intel_wakeref_unlock(struct intel_wakeref *wf) } /** - * intel_wakeref_active: Query whether the wakeref is currently held + * intel_wakeref_unlock_wait: Wait until the active callback is complete + * @wf: the wakeref + * + * Waits for the active callback (under the @wf->mutex or another CPU) is + * complete. + */ +static inline void +intel_wakeref_unlock_wait(struct intel_wakeref *wf) +{ + mutex_lock(&wf->mutex); + mutex_unlock(&wf->mutex); + flush_work(&wf->work); +} + +/** + * intel_wakeref_is_active: Query whether the wakeref is currently held * @wf: the wakeref * * Returns: true if the wakeref is currently held. */ static inline bool -intel_wakeref_active(struct intel_wakeref *wf) +intel_wakeref_is_active(const struct intel_wakeref *wf) { return READ_ONCE(wf->wakeref); } +/** + * __intel_wakeref_defer_park: Defer the current park callback + * @wf: the wakeref + */ +static inline void +__intel_wakeref_defer_park(struct intel_wakeref *wf) +{ + lockdep_assert_held(&wf->mutex); + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); + atomic_set_release(&wf->count, 1); +} + +/** + * intel_wakeref_wait_for_idle: Wait until the wakeref is idle + * @wf: the wakeref + * + * Wait for the earlier asynchronous release of the wakeref. Note + * this will wait for any third party as well, so make sure you only wait + * when you have control over the wakeref and trust no one else is acquiring + * it. + * + * Return: 0 on success, error code if killed. + */ +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf); + struct intel_wakeref_auto { struct intel_runtime_pm *rpm; struct timer_list timer; diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 7b4ba84b9fb8..2bb9f9f9a50a 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation + * Copyright © 2017-2019 Intel Corporation */ #include "intel_wopcm.h" @@ -64,6 +63,11 @@ #define GEN9_GUC_FW_RESERVED SZ_128K #define GEN9_GUC_WOPCM_OFFSET (GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED) +static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) +{ + return container_of(wopcm, struct drm_i915_private, wopcm); +} + /** * intel_wopcm_init_early() - Early initialization of the WOPCM. * @wopcm: pointer to intel_wopcm. @@ -74,7 +78,7 @@ void intel_wopcm_init_early(struct intel_wopcm *wopcm) { struct drm_i915_private *i915 = wopcm_to_i915(wopcm); - if (!HAS_GUC(i915)) + if (!HAS_GT_UC(i915)) return; if (INTEL_GEN(i915) >= 11) @@ -82,7 +86,7 @@ void intel_wopcm_init_early(struct intel_wopcm *wopcm) else wopcm->size = GEN9_WOPCM_SIZE; - DRM_DEBUG_DRIVER("WOPCM size: %uKiB\n", wopcm->size / 1024); + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "WOPCM: %uK\n", wopcm->size / 1024); } static inline u32 context_reserved_size(struct drm_i915_private *i915) @@ -95,7 +99,8 @@ static inline u32 context_reserved_size(struct drm_i915_private *i915) return 0; } -static inline int gen9_check_dword_gap(u32 guc_wopcm_base, u32 guc_wopcm_size) +static inline bool gen9_check_dword_gap(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size) { u32 offset; @@ -107,16 +112,18 @@ static inline int gen9_check_dword_gap(u32 guc_wopcm_base, u32 guc_wopcm_size) offset = guc_wopcm_base + GEN9_GUC_WOPCM_OFFSET; if (offset > guc_wopcm_size || (guc_wopcm_size - offset) < sizeof(u32)) { - DRM_ERROR("GuC WOPCM size %uKiB is too small. %uKiB needed.\n", - guc_wopcm_size / 1024, - (u32)(offset + sizeof(u32)) / 1024); - return -E2BIG; + dev_err(i915->drm.dev, + "WOPCM: invalid GuC region size: %uK < %uK\n", + guc_wopcm_size / SZ_1K, + (u32)(offset + sizeof(u32)) / SZ_1K); + return false; } - return 0; + return true; } -static inline int gen9_check_huc_fw_fits(u32 guc_wopcm_size, u32 huc_fw_size) +static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, + u32 guc_wopcm_size, u32 huc_fw_size) { /* * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM @@ -124,29 +131,81 @@ static inline int gen9_check_huc_fw_fits(u32 guc_wopcm_size, u32 huc_fw_size) * firmware uploading would fail. */ if (huc_fw_size > guc_wopcm_size - GUC_WOPCM_RESERVED) { - DRM_ERROR("HuC FW (%uKiB) won't fit in GuC WOPCM (%uKiB).\n", - huc_fw_size / 1024, - (guc_wopcm_size - GUC_WOPCM_RESERVED) / 1024); - return -E2BIG; + dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), + (guc_wopcm_size - GUC_WOPCM_RESERVED) / SZ_1K, + huc_fw_size / 1024); + return false; } - return 0; + return true; +} + +static inline bool check_hw_restrictions(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 huc_fw_size) +{ + if (IS_GEN(i915, 9) && !gen9_check_dword_gap(i915, guc_wopcm_base, + guc_wopcm_size)) + return false; + + if ((IS_GEN(i915, 9) || + IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0)) && + !gen9_check_huc_fw_fits(i915, guc_wopcm_size, huc_fw_size)) + return false; + + return true; } -static inline int check_hw_restriction(struct drm_i915_private *i915, - u32 guc_wopcm_base, u32 guc_wopcm_size, - u32 huc_fw_size) +static inline bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 guc_fw_size, u32 huc_fw_size) { - int err = 0; + const u32 ctx_rsvd = context_reserved_size(i915); + u32 size; + + size = wopcm_size - ctx_rsvd; + if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) { + dev_err(i915->drm.dev, + "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, + size / SZ_1K); + return false; + } + + size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if (unlikely(guc_wopcm_size < size)) { + dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), + guc_wopcm_size / SZ_1K, size / SZ_1K); + return false; + } - if (IS_GEN(i915, 9)) - err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size); + size = huc_fw_size + WOPCM_RESERVED_SIZE; + if (unlikely(guc_wopcm_base < size)) { + dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), + guc_wopcm_base / SZ_1K, size / SZ_1K); + return false; + } + + return check_hw_restrictions(i915, guc_wopcm_base, guc_wopcm_size, + huc_fw_size); +} - if (!err && - (IS_GEN(i915, 9) || IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0))) - err = gen9_check_huc_fw_fits(guc_wopcm_size, huc_fw_size); +static bool __wopcm_regs_locked(struct intel_uncore *uncore, + u32 *guc_wopcm_base, u32 *guc_wopcm_size) +{ + u32 reg_base = intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET); + u32 reg_size = intel_uncore_read(uncore, GUC_WOPCM_SIZE); + + if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || + !(reg_base & GUC_WOPCM_OFFSET_VALID)) + return false; - return err; + *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK; + *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK; + return true; } /** @@ -156,135 +215,66 @@ static inline int check_hw_restriction(struct drm_i915_private *i915, * This function will partition WOPCM space based on GuC and HuC firmware sizes * and will allocate max remaining for use by GuC. This function will also * enforce platform dependent hardware restrictions on GuC WOPCM offset and - * size. It will fail the WOPCM init if any of these checks were failed, so that - * the following GuC firmware uploading would be aborted. - * - * Return: 0 on success, non-zero error code on failure. + * size. It will fail the WOPCM init if any of these checks fail, so that the + * following WOPCM registers setup and GuC firmware uploading would be aborted. */ -int intel_wopcm_init(struct intel_wopcm *wopcm) +void intel_wopcm_init(struct intel_wopcm *wopcm) { struct drm_i915_private *i915 = wopcm_to_i915(wopcm); - u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->guc.fw); - u32 huc_fw_size = intel_uc_fw_get_upload_size(&i915->huc.fw); + struct intel_gt *gt = &i915->gt; + u32 guc_fw_size = intel_uc_fw_get_upload_size(>->uc.guc.fw); + u32 huc_fw_size = intel_uc_fw_get_upload_size(>->uc.huc.fw); u32 ctx_rsvd = context_reserved_size(i915); u32 guc_wopcm_base; u32 guc_wopcm_size; - u32 guc_wopcm_rsvd; - int err; - if (!USES_GUC(i915)) - return 0; + if (!guc_fw_size) + return; GEM_BUG_ON(!wopcm->size); + GEM_BUG_ON(wopcm->guc.base); + GEM_BUG_ON(wopcm->guc.size); + GEM_BUG_ON(guc_fw_size >= wopcm->size); + GEM_BUG_ON(huc_fw_size >= wopcm->size); + GEM_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); - if (i915_inject_load_failure()) - return -E2BIG; + if (i915_inject_probe_failure(i915)) + return; - if (guc_fw_size >= wopcm->size) { - DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.", - guc_fw_size / 1024); - return -E2BIG; + if (__wopcm_regs_locked(gt->uncore, &guc_wopcm_base, &guc_wopcm_size)) { + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, + "GuC WOPCM is already locked [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, + guc_wopcm_size / SZ_1K); + goto check; } - if (huc_fw_size >= wopcm->size) { - DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.", - huc_fw_size / 1024); - return -E2BIG; - } + /* + * Aligned value of guc_wopcm_base will determine available WOPCM space + * for HuC firmware and mandatory reserved area. + */ + guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE; + guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT); - guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE, - GUC_WOPCM_OFFSET_ALIGNMENT); - if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) { - DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n", - guc_wopcm_base / 1024); - return -E2BIG; - } + /* + * Need to clamp guc_wopcm_base now to make sure the following math is + * correct. Formal check of whole WOPCM layout will be done below. + */ + guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd); - guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd; + /* Aligned remainings of usable WOPCM space can be assigned to GuC. */ + guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base; guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; - DRM_DEBUG_DRIVER("Calculated GuC WOPCM Region: [%uKiB, %uKiB)\n", - guc_wopcm_base / 1024, guc_wopcm_size / 1024); + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "Calculated GuC WOPCM [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); - guc_wopcm_rsvd = GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; - if ((guc_fw_size + guc_wopcm_rsvd) > guc_wopcm_size) { - DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n", - (guc_fw_size + guc_wopcm_rsvd) / 1024, - guc_wopcm_size / 1024); - return -E2BIG; +check: + if (__check_layout(i915, wopcm->size, guc_wopcm_base, guc_wopcm_size, + guc_fw_size, huc_fw_size)) { + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + GEM_BUG_ON(!wopcm->guc.base); + GEM_BUG_ON(!wopcm->guc.size); } - - err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size, - huc_fw_size); - if (err) - return err; - - wopcm->guc.base = guc_wopcm_base; - wopcm->guc.size = guc_wopcm_size; - - return 0; -} - -static inline int write_and_verify(struct drm_i915_private *dev_priv, - i915_reg_t reg, u32 val, u32 mask, - u32 locked_bit) -{ - u32 reg_val; - - GEM_BUG_ON(val & ~mask); - - I915_WRITE(reg, val); - - reg_val = I915_READ(reg); - - return (reg_val & mask) != (val | locked_bit) ? -EIO : 0; -} - -/** - * intel_wopcm_init_hw() - Setup GuC WOPCM registers. - * @wopcm: pointer to intel_wopcm. - * - * Setup the GuC WOPCM size and offset registers with the calculated values. It - * will verify the register values to make sure the registers are locked with - * correct values. - * - * Return: 0 on success. -EIO if registers were locked with incorrect values. - */ -int intel_wopcm_init_hw(struct intel_wopcm *wopcm) -{ - struct drm_i915_private *dev_priv = wopcm_to_i915(wopcm); - u32 huc_agent; - u32 mask; - int err; - - if (!USES_GUC(dev_priv)) - return 0; - - GEM_BUG_ON(!HAS_GUC(dev_priv)); - GEM_BUG_ON(!wopcm->guc.size); - GEM_BUG_ON(!wopcm->guc.base); - - err = write_and_verify(dev_priv, GUC_WOPCM_SIZE, wopcm->guc.size, - GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED, - GUC_WOPCM_SIZE_LOCKED); - if (err) - goto err_out; - - huc_agent = USES_HUC(dev_priv) ? HUC_LOADING_AGENT_GUC : 0; - mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; - err = write_and_verify(dev_priv, DMA_GUC_WOPCM_OFFSET, - wopcm->guc.base | huc_agent, mask, - GUC_WOPCM_OFFSET_VALID); - if (err) - goto err_out; - - return 0; - -err_out: - DRM_ERROR("Failed to init WOPCM registers:\n"); - DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n", - I915_READ(DMA_GUC_WOPCM_OFFSET)); - DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", I915_READ(GUC_WOPCM_SIZE)); - - return err; } diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h index 114401971520..17d6aa86008a 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.h +++ b/drivers/gpu/drm/i915/intel_wopcm.h @@ -25,6 +25,21 @@ struct intel_wopcm { }; /** + * intel_wopcm_guc_base() + * @wopcm: intel_wopcm structure + * + * Returns the base of the WOPCM shadowed region. + * + * Returns: + * 0 if GuC is not present or not in use. + * Otherwise, the GuC WOPCM base. + */ +static inline u32 intel_wopcm_guc_base(struct intel_wopcm *wopcm) +{ + return wopcm->guc.base; +} + +/** * intel_wopcm_guc_size() * @wopcm: intel_wopcm structure * @@ -40,7 +55,6 @@ static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm) } void intel_wopcm_init_early(struct intel_wopcm *wopcm); -int intel_wopcm_init(struct intel_wopcm *wopcm); -int intel_wopcm_init_hw(struct intel_wopcm *wopcm); +void intel_wopcm_init(struct intel_wopcm *wopcm); #endif diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.c b/drivers/gpu/drm/i915/oa/i915_oa_bdw.c index 4acdb94555b7..14da5c3b569d 100644 --- a/drivers/gpu/drm/i915/i915_oa_bdw.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_bdw.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -66,26 +65,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "d6de6f55-e526-4f79-a6a6-d7315c09044e", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "d6de6f55-e526-4f79-a6a6-d7315c09044e"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "d6de6f55-e526-4f79-a6a6-d7315c09044e"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bdw.h b/drivers/gpu/drm/i915/oa/i915_oa_bdw.h new file mode 100644 index 000000000000..0cee3334f0a6 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_bdw.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_BDW_H__ +#define __I915_OA_BDW_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.c b/drivers/gpu/drm/i915/oa/i915_oa_bxt.c index a44195c39923..3e785bafcf99 100644 --- a/drivers/gpu/drm/i915/i915_oa_bxt.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_bxt.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -64,26 +63,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "5ee72f5c-092f-421e-8b70-225f7c3e9612", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "5ee72f5c-092f-421e-8b70-225f7c3e9612"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "5ee72f5c-092f-421e-8b70-225f7c3e9612"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bxt.h b/drivers/gpu/drm/i915/oa/i915_oa_bxt.h new file mode 100644 index 000000000000..0bdf391323ec --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_bxt.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_BXT_H__ +#define __I915_OA_BXT_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.c index 7f60d51b8761..0ea86f70a06c 100644 --- a/drivers/gpu/drm/i915/i915_oa_cflgt2.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "74fb4902-d3d3-4237-9e90-cbdc68d0a446", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "74fb4902-d3d3-4237-9e90-cbdc68d0a446"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "74fb4902-d3d3-4237-9e90-cbdc68d0a446"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h new file mode 100644 index 000000000000..6b862280ab78 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_CFLGT2_H__ +#define __I915_OA_CFLGT2_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.c index a92c38e3a0ce..fc632dd890bf 100644 --- a/drivers/gpu/drm/i915/i915_oa_cflgt3.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "577e8e2c-3fa0-4875-8743-3538d585e3b0", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "577e8e2c-3fa0-4875-8743-3538d585e3b0"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "577e8e2c-3fa0-4875-8743-3538d585e3b0"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h new file mode 100644 index 000000000000..4ca9d8f89b2f --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_CFLGT3_H__ +#define __I915_OA_CFLGT3_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_chv.c b/drivers/gpu/drm/i915/oa/i915_oa_chv.c index 71ec889a0114..6cd4e9921a8a 100644 --- a/drivers/gpu/drm/i915/i915_oa_chv.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_chv.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "4a534b07-cba3-414d-8d60-874830e883aa", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "4a534b07-cba3-414d-8d60-874830e883aa"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "4a534b07-cba3-414d-8d60-874830e883aa"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_chv.h b/drivers/gpu/drm/i915/oa/i915_oa_chv.h new file mode 100644 index 000000000000..3cac7bbc9c71 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_chv.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_CHV_H__ +#define __I915_OA_CHV_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.c b/drivers/gpu/drm/i915/oa/i915_oa_cnl.c index 5c23d883d6c9..1041e8914993 100644 --- a/drivers/gpu/drm/i915/i915_oa_cnl.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_cnl.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -77,26 +76,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "db41edd4-d8e7-4730-ad11-b9a2d6833503", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "db41edd4-d8e7-4730-ad11-b9a2d6833503"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "db41edd4-d8e7-4730-ad11-b9a2d6833503"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cnl.h b/drivers/gpu/drm/i915/oa/i915_oa_cnl.h new file mode 100644 index 000000000000..db379f5fcbb9 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_cnl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_CNL_H__ +#define __I915_OA_CNL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_glk.c b/drivers/gpu/drm/i915/oa/i915_oa_glk.c index 4bdda66df7d2..bd15ebe9aeeb 100644 --- a/drivers/gpu/drm/i915/i915_oa_glk.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_glk.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -64,26 +63,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "dd3fd789-e783-4204-8cd0-b671bbccb0cf", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "dd3fd789-e783-4204-8cd0-b671bbccb0cf"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "dd3fd789-e783-4204-8cd0-b671bbccb0cf"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_glk.h b/drivers/gpu/drm/i915/oa/i915_oa_glk.h new file mode 100644 index 000000000000..779f343efd11 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_glk.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_GLK_H__ +#define __I915_OA_GLK_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c b/drivers/gpu/drm/i915/oa/i915_oa_hsw.c index cc6526fdd2bd..133721a8619f 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_hsw.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -94,26 +93,26 @@ show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *b void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "403d8832-1a27-4aa6-a64e-f5389ce7b212", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_render_basic; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_render_basic); + dev_priv->perf.test_config.mux_regs = mux_config_render_basic; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_render_basic); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_render_basic; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_render_basic); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_render_basic; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_render_basic); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_render_basic; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + dev_priv->perf.test_config.flex_regs = flex_eu_config_render_basic; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); - dev_priv->perf.oa.test_config.sysfs_metric.name = "403d8832-1a27-4aa6-a64e-f5389ce7b212"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "403d8832-1a27-4aa6-a64e-f5389ce7b212"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_render_basic_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_render_basic_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_hsw.h b/drivers/gpu/drm/i915/oa/i915_oa_hsw.h new file mode 100644 index 000000000000..ba97f732f136 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_hsw.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_HSW_H__ +#define __I915_OA_HSW_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_icl.c b/drivers/gpu/drm/i915/oa/i915_oa_icl.c index baa51427a543..2d92041b754f 100644 --- a/drivers/gpu/drm/i915/i915_oa_icl.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_icl.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -74,26 +73,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "a291665e-244b-4b76-9b9a-01de9d3c8068", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_icl.h b/drivers/gpu/drm/i915/oa/i915_oa_icl.h new file mode 100644 index 000000000000..5c64112d720e --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_icl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_ICL_H__ +#define __I915_OA_ICL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.c index 168e49ab0d4d..1c3a67c9cfe0 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt2.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "baa3c7e4-52b6-4b85-801e-465a94b746dd", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "baa3c7e4-52b6-4b85-801e-465a94b746dd"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "baa3c7e4-52b6-4b85-801e-465a94b746dd"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h new file mode 100644 index 000000000000..810532fa6b63 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_KBLGT2_H__ +#define __I915_OA_KBLGT2_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.c index 6ffa553c388e..ebbe5a9c9fdc 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt3.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "f1792f32-6db2-4b50-b4b2-557128f1688d", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "f1792f32-6db2-4b50-b4b2-557128f1688d"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "f1792f32-6db2-4b50-b4b2-557128f1688d"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h new file mode 100644 index 000000000000..13d70456fabd --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_KBLGT3_H__ +#define __I915_OA_KBLGT3_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.c index 7ce6ee851d43..1bc359ed34e8 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt2.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -64,26 +63,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "1651949f-0ac0-4cb1-a06f-dafd74a407d1", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "1651949f-0ac0-4cb1-a06f-dafd74a407d1"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "1651949f-0ac0-4cb1-a06f-dafd74a407d1"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h new file mode 100644 index 000000000000..fda70c51a6ec --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_SKLGT2_H__ +#define __I915_OA_SKLGT2_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.c index 086ca2631e1c..6e352f881310 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt3.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "2b985803-d3c9-4629-8a4f-634bfecba0e8", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "2b985803-d3c9-4629-8a4f-634bfecba0e8"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "2b985803-d3c9-4629-8a4f-634bfecba0e8"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h new file mode 100644 index 000000000000..df74eba5799e --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_SKLGT3_H__ +#define __I915_OA_SKLGT3_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.c index b291a6eb8a87..8f345115a306 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt4.c +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation + * Copyright © 2018-2019 Intel Corporation * * Autogenerated file by GPU Top : https://github.com/rib/gputop * DO NOT EDIT manually! @@ -65,26 +64,26 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv) { - strlcpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.test_config.uuid, "882fa433-1f4a-4a67-a962-c741888fe5f5", - sizeof(dev_priv->perf.oa.test_config.uuid)); - dev_priv->perf.oa.test_config.id = 1; + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; - dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; - dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); - dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; - dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); - dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; - dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); - dev_priv->perf.oa.test_config.sysfs_metric.name = "882fa433-1f4a-4a67-a962-c741888fe5f5"; - dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + dev_priv->perf.test_config.sysfs_metric.name = "882fa433-1f4a-4a67-a962-c741888fe5f5"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; - dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; - dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; - dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; } diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h new file mode 100644 index 000000000000..378ab7ab78d5 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018-2019 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_SKLGT4_H__ +#define __I915_OA_SKLGT4_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c new file mode 100644 index 000000000000..a29d93707345 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_tgl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0xD920), 0x00000000 }, + { _MMIO(0xD900), 0x00000000 }, + { _MMIO(0xD904), 0xF0800000 }, + { _MMIO(0xD910), 0x00000000 }, + { _MMIO(0xD914), 0xF0800000 }, + { _MMIO(0xDC40), 0x00FF0000 }, + { _MMIO(0xD940), 0x00000004 }, + { _MMIO(0xD944), 0x0000FFFF }, + { _MMIO(0xDC00), 0x00000004 }, + { _MMIO(0xDC04), 0x0000FFFF }, + { _MMIO(0xD948), 0x00000003 }, + { _MMIO(0xD94C), 0x0000FFFF }, + { _MMIO(0xDC08), 0x00000003 }, + { _MMIO(0xDC0C), 0x0000FFFF }, + { _MMIO(0xD950), 0x00000007 }, + { _MMIO(0xD954), 0x0000FFFF }, + { _MMIO(0xDC10), 0x00000007 }, + { _MMIO(0xDC14), 0x0000FFFF }, + { _MMIO(0xD958), 0x00100002 }, + { _MMIO(0xD95C), 0x0000FFF7 }, + { _MMIO(0xDC18), 0x00100002 }, + { _MMIO(0xDC1C), 0x0000FFF7 }, + { _MMIO(0xD960), 0x00100002 }, + { _MMIO(0xD964), 0x0000FFCF }, + { _MMIO(0xDC20), 0x00100002 }, + { _MMIO(0xDC24), 0x0000FFCF }, + { _MMIO(0xD968), 0x00100082 }, + { _MMIO(0xD96C), 0x0000FFEF }, + { _MMIO(0xDC28), 0x00100082 }, + { _MMIO(0xDC2C), 0x0000FFEF }, + { _MMIO(0xD970), 0x001000C2 }, + { _MMIO(0xD974), 0x0000FFE7 }, + { _MMIO(0xDC30), 0x001000C2 }, + { _MMIO(0xDC34), 0x0000FFE7 }, + { _MMIO(0xD978), 0x00100001 }, + { _MMIO(0xD97C), 0x0000FFE7 }, + { _MMIO(0xDC38), 0x00100001 }, + { _MMIO(0xDC3C), 0x0000FFE7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x0D04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x280E0000 }, + { _MMIO(0x9888), 0x1E0E0147 }, + { _MMIO(0x9888), 0x180E0000 }, + { _MMIO(0x9888), 0x160E0000 }, + { _MMIO(0x9888), 0x1E0F1000 }, + { _MMIO(0x9888), 0x1E104000 }, + { _MMIO(0x9888), 0x2E020100 }, + { _MMIO(0x9888), 0x2C030004 }, + { _MMIO(0x9888), 0x38003000 }, + { _MMIO(0x9888), 0x1E0A8000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x49110000 }, + { _MMIO(0x9888), 0x5D101400 }, + { _MMIO(0x9888), 0x1D140020 }, + { _MMIO(0x9888), 0x1D1103A3 }, + { _MMIO(0x9888), 0x01110000 }, + { _MMIO(0x9888), 0x61111000 }, + { _MMIO(0x9888), 0x1F128000 }, + { _MMIO(0x9888), 0x17100000 }, + { _MMIO(0x9888), 0x55100630 }, + { _MMIO(0x9888), 0x57100000 }, + { _MMIO(0x9888), 0x31100000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x65100002 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x42000001 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.test_config.uuid, + "80a833f0-2504-4321-8894-e9277844ce7b", + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; + + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; + + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; + + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h new file mode 100644 index 000000000000..4c25f0be825c --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_TGL_H__ +#define __I915_OA_TGL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index c0b3537a5fa6..ef572a0c2566 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -4,7 +4,10 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/kref.h> + #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" @@ -13,42 +16,90 @@ struct live_active { struct i915_active base; + struct kref ref; bool retired; }; -static void __live_active_retire(struct i915_active *base) +static void __live_get(struct live_active *active) +{ + kref_get(&active->ref); +} + +static void __live_free(struct live_active *active) +{ + i915_active_fini(&active->base); + kfree(active); +} + +static void __live_release(struct kref *ref) +{ + struct live_active *active = container_of(ref, typeof(*active), ref); + + __live_free(active); +} + +static void __live_put(struct live_active *active) +{ + kref_put(&active->ref, __live_release); +} + +static int __live_active(struct i915_active *base) +{ + struct live_active *active = container_of(base, typeof(*active), base); + + __live_get(active); + return 0; +} + +static void __live_retire(struct i915_active *base) { struct live_active *active = container_of(base, typeof(*active), base); active->retired = true; + __live_put(active); +} + +static struct live_active *__live_alloc(struct drm_i915_private *i915) +{ + struct live_active *active; + + active = kzalloc(sizeof(*active), GFP_KERNEL); + if (!active) + return NULL; + + kref_init(&active->ref); + i915_active_init(&active->base, __live_active, __live_retire); + + return active; } -static int __live_active_setup(struct drm_i915_private *i915, - struct live_active *active) +static struct live_active * +__live_active_setup(struct drm_i915_private *i915) { struct intel_engine_cs *engine; struct i915_sw_fence *submit; - enum intel_engine_id id; + struct live_active *active; unsigned int count = 0; int err = 0; - submit = heap_fence_create(GFP_KERNEL); - if (!submit) - return -ENOMEM; + active = __live_alloc(i915); + if (!active) + return ERR_PTR(-ENOMEM); - i915_active_init(i915, &active->base, __live_active_retire); - active->retired = false; + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + kfree(active); + return ERR_PTR(-ENOMEM); + } - if (!i915_active_acquire(&active->base)) { - pr_err("First i915_active_acquire should report being idle\n"); - err = -EINVAL; + err = i915_active_acquire(&active->base); + if (err) goto out; - } - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct i915_request *rq; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -58,8 +109,7 @@ static int __live_active_setup(struct drm_i915_private *i915, submit, GFP_KERNEL); if (err >= 0) - err = i915_active_ref(&active->base, - rq->fence.context, rq); + err = i915_active_add_request(&active->base, rq); i915_request_add(rq); if (err) { pr_err("Failed to track active ref!\n"); @@ -70,78 +120,84 @@ static int __live_active_setup(struct drm_i915_private *i915, } i915_active_release(&active->base); - if (active->retired && count) { + if (READ_ONCE(active->retired) && count) { pr_err("i915_active retired before submission!\n"); err = -EINVAL; } - if (active->base.count != count) { + if (atomic_read(&active->base.count) != count) { pr_err("i915_active not tracking all requests, found %d, expected %d\n", - active->base.count, count); + atomic_read(&active->base.count), count); err = -EINVAL; } out: i915_sw_fence_commit(submit); heap_fence_put(submit); + if (err) { + __live_put(active); + active = ERR_PTR(err); + } - return err; + return active; } static int live_active_wait(void *arg) { struct drm_i915_private *i915 = arg; - struct live_active active; - intel_wakeref_t wakeref; - int err; + struct live_active *active; + int err = 0; /* Check that we get a callback when requests retire upon waiting */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + active = __live_active_setup(i915); + if (IS_ERR(active)) + return PTR_ERR(active); - err = __live_active_setup(i915, &active); + i915_active_wait(&active->base); + if (!READ_ONCE(active->retired)) { + struct drm_printer p = drm_err_printer(__func__); - i915_active_wait(&active.base); - if (!active.retired) { pr_err("i915_active not retired after waiting!\n"); + i915_active_print(&active->base, &p); + err = -EINVAL; } - i915_active_fini(&active.base); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + __live_put(active); + + if (igt_flush_test(i915)) err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static int live_active_retire(void *arg) { struct drm_i915_private *i915 = arg; - struct live_active active; - intel_wakeref_t wakeref; - int err; + struct live_active *active; + int err = 0; /* Check that we get a callback when requests are indirectly retired */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - err = __live_active_setup(i915, &active); + active = __live_active_setup(i915); + if (IS_ERR(active)) + return PTR_ERR(active); /* waits for & retires all requests */ - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - if (!active.retired) { + if (!READ_ONCE(active->retired)) { + struct drm_printer p = drm_err_printer(__func__); + pr_err("i915_active not retired after flushing!\n"); + i915_active_print(&active->base, &p); + err = -EINVAL; } - i915_active_fini(&active.base); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + __live_put(active); + return err; } @@ -152,8 +208,86 @@ int i915_active_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_retire), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_subtests(tests, i915); } + +static struct intel_engine_cs *node_to_barrier(struct active_node *it) +{ + struct intel_engine_cs *engine; + + if (!is_barrier(&it->base)) + return NULL; + + engine = __barrier_to_engine(it); + smp_rmb(); /* serialise with add_active_barriers */ + if (!is_barrier(&it->base)) + return NULL; + + return engine; +} + +void i915_active_print(struct i915_active *ref, struct drm_printer *m) +{ + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); + drm_printf(m, "\tpreallocated barriers? %s\n", + yesno(!llist_empty(&ref->preallocated_barriers))); + + if (i915_active_acquire_if_busy(ref)) { + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + struct intel_engine_cs *engine; + + engine = node_to_barrier(it); + if (engine) { + drm_printf(m, "\tbarrier: %s\n", engine->name); + continue; + } + + if (i915_active_fence_isset(&it->base)) { + drm_printf(m, + "\ttimeline: %llx\n", it->timeline); + continue; + } + } + + i915_active_release(ref); + } +} + +static void spin_unlock_wait(spinlock_t *lock) +{ + spin_lock_irq(lock); + spin_unlock_irq(lock); +} + +void i915_active_unlock_wait(struct i915_active *ref) +{ + if (i915_active_acquire_if_busy(ref)) { + struct active_node *it, *n; + + rcu_read_lock(); + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + struct dma_fence *f; + + /* Wait for all active callbacks */ + f = rcu_dereference(it->base.fence); + if (f) + spin_unlock_wait(f->lock); + } + rcu_read_unlock(); + + i915_active_release(ref); + } + + /* And wait for the retire callback */ + spin_lock_irq(&ref->tree_lock); + spin_unlock_irq(&ref->tree_lock); + + /* ... which may have been on a thread instead */ + flush_work(&ref->work); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c new file mode 100644 index 000000000000..1b856bae67b5 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +#define SZ_8G (1ULL << 33) + +static void __igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + bool buddy) +{ + pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", + block->header, + i915_buddy_block_state(block), + i915_buddy_block_order(block), + i915_buddy_block_offset(block), + i915_buddy_block_size(mm, block), + yesno(!block->parent), + yesno(buddy)); +} + +static void igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + + __igt_dump_block(mm, block, false); + + buddy = get_buddy(block); + if (buddy) + __igt_dump_block(mm, buddy, true); +} + +static int igt_check_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + unsigned int block_state; + u64 block_size; + u64 offset; + int err = 0; + + block_state = i915_buddy_block_state(block); + + if (block_state != I915_BUDDY_ALLOCATED && + block_state != I915_BUDDY_FREE && + block_state != I915_BUDDY_SPLIT) { + pr_err("block state mismatch\n"); + err = -EINVAL; + } + + block_size = i915_buddy_block_size(mm, block); + offset = i915_buddy_block_offset(block); + + if (block_size < mm->chunk_size) { + pr_err("block size smaller than min size\n"); + err = -EINVAL; + } + + if (!is_power_of_2(block_size)) { + pr_err("block size not power of two\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(block_size, mm->chunk_size)) { + pr_err("block size not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, mm->chunk_size)) { + pr_err("block offset not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, block_size)) { + pr_err("block offset not aligned to block size\n"); + err = -EINVAL; + } + + buddy = get_buddy(block); + + if (!buddy && block->parent) { + pr_err("buddy has gone fishing\n"); + err = -EINVAL; + } + + if (buddy) { + if (i915_buddy_block_offset(buddy) != (offset ^ block_size)) { + pr_err("buddy has wrong offset\n"); + err = -EINVAL; + } + + if (i915_buddy_block_size(mm, buddy) != block_size) { + pr_err("buddy size mismatch\n"); + err = -EINVAL; + } + + if (i915_buddy_block_state(buddy) == block_state && + block_state == I915_BUDDY_FREE) { + pr_err("block and its buddy are free\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_check_blocks(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 expected_size, + bool is_contiguous) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *prev; + u64 total; + int err = 0; + + block = NULL; + prev = NULL; + total = 0; + + list_for_each_entry(block, blocks, link) { + err = igt_check_block(mm, block); + + if (!i915_buddy_block_is_allocated(block)) { + pr_err("block not allocated\n"), + err = -EINVAL; + } + + if (is_contiguous && prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(block); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("block offset mismatch\n"); + err = -EINVAL; + } + } + + if (err) + break; + + total += i915_buddy_block_size(mm, block); + prev = block; + } + + if (!err) { + if (total != expected_size) { + pr_err("size mismatch, expected=%llx, found=%llx\n", + expected_size, total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev block, dump:\n"); + igt_dump_block(mm, prev); + } + + if (block) { + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); + } + + return err; +} + +static int igt_check_mm(struct i915_buddy_mm *mm) +{ + struct i915_buddy_block *root; + struct i915_buddy_block *prev; + unsigned int i; + u64 total; + int err = 0; + + if (!mm->n_roots) { + pr_err("n_roots is zero\n"); + return -EINVAL; + } + + if (mm->n_roots != hweight64(mm->size)) { + pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", + mm->n_roots, hweight64(mm->size)); + return -EINVAL; + } + + root = NULL; + prev = NULL; + total = 0; + + for (i = 0; i < mm->n_roots; ++i) { + struct i915_buddy_block *block; + unsigned int order; + + root = mm->roots[i]; + if (!root) { + pr_err("root(%u) is NULL\n", i); + err = -EINVAL; + break; + } + + err = igt_check_block(mm, root); + + if (!i915_buddy_block_is_free(root)) { + pr_err("root not free\n"); + err = -EINVAL; + } + + order = i915_buddy_block_order(root); + + if (!i) { + if (order != mm->max_order) { + pr_err("max order root missing\n"); + err = -EINVAL; + } + } + + if (prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(root); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("root offset mismatch\n"); + err = -EINVAL; + } + } + + block = list_first_entry_or_null(&mm->free_list[order], + struct i915_buddy_block, + link); + if (block != root) { + pr_err("root mismatch at order=%u\n", order); + err = -EINVAL; + } + + if (err) + break; + + prev = root; + total += i915_buddy_block_size(mm, root); + } + + if (!err) { + if (total != mm->size) { + pr_err("expected mm size=%llx, found=%llx\n", mm->size, + total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev root(%u), dump:\n", i - 1); + igt_dump_block(mm, prev); + } + + if (root) { + pr_err("bad root(%u), dump:\n", i); + igt_dump_block(mm, root); + } + + return err; +} + +static void igt_mm_config(u64 *size, u64 *chunk_size) +{ + I915_RND_STATE(prng); + u64 s, ms; + + /* Nothing fancy, just try to get an interesting bit pattern */ + + prandom_seed_state(&prng, i915_selftest.random_seed); + + s = i915_prandom_u64_state(&prng) & (SZ_8G - 1); + ms = BIT_ULL(12 + (prandom_u32_state(&prng) % ilog2(s >> 12))); + s = max(s & -ms, ms); + + *chunk_size = ms; + *size = s; +} + +static int igt_buddy_alloc_smoke(void *arg) +{ + struct i915_buddy_mm mm; + int max_order; + u64 chunk_size; + u64 mm_size; + int err; + + igt_mm_config(&mm_size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", mm_size, chunk_size); + + err = i915_buddy_init(&mm, mm_size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + for (max_order = mm.max_order; max_order >= 0; max_order--) { + struct i915_buddy_block *block; + int order; + LIST_HEAD(blocks); + u64 total; + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort\n"); + break; + } + + pr_info("filling from max_order=%u\n", max_order); + + order = max_order; + total = 0; + + do { +retry: + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + if (err == -ENOMEM) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + } else { + if (order--) { + err = 0; + goto retry; + } + + pr_err("buddy_alloc with order=%d failed(%d)\n", + order, err); + } + + break; + } + + list_add_tail(&block->link, &blocks); + + if (i915_buddy_block_order(block) != order) { + pr_err("buddy_alloc order mismatch\n"); + err = -EINVAL; + break; + } + + total += i915_buddy_block_size(&mm, block); + } while (total < mm.size); + + if (!err) + err = igt_check_blocks(&mm, &blocks, total, false); + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + + if (err) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_pessimistic(void *arg) +{ + const unsigned int max_order = 16; + struct i915_buddy_block *block, *bn; + struct i915_buddy_mm mm; + unsigned int order; + LIST_HEAD(blocks); + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order < max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* And now the last remaining block available */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM on final alloc\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + + /* Should be completely full! */ + for (order = max_order; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + block = list_last_entry(&blocks, typeof(*block), link); + list_del(&block->link); + i915_buddy_free(&mm, block); + + /* As we free in increasing size, we make available larger blocks */ + order = 1; + list_for_each_entry_safe(block, bn, &blocks, link) { + list_del(&block->link); + i915_buddy_free(&mm, block); + + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + order++; + } + + /* To confirm, now the whole mm should be available */ + block = i915_buddy_alloc(&mm, max_order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + max_order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_optimistic(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + int order; + int err; + + /* + * Create a mm with one block of each order available, and + * try to allocate them all. + */ + + err = i915_buddy_init(&mm, + PAGE_SIZE * ((1 << (max_order + 1)) - 1), + PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* Should be completely full! */ + block = i915_buddy_alloc(&mm, 0); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded, it should be full!"); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_pathological(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + LIST_HEAD(holes); + int order, top; + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. Free the largest block, then whittle down again. + * Eventually we will have a fully 50% fragmented mm. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (top = max_order; top; top--) { + /* Make room by freeing the largest allocated block */ + block = list_first_entry_or_null(&blocks, typeof(*block), link); + if (block) { + list_del(&block->link); + i915_buddy_free(&mm, block); + } + + for (order = top; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d, top=%d\n", + order, top); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + } + + /* There should be one final page for this sub-allocation */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM for hole\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &holes); + + block = i915_buddy_alloc(&mm, top); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", + top, max_order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + i915_buddy_free_list(&mm, &holes); + + /* Nothing larger than blocks of chunk_size now available */ + for (order = 1; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + +err: + list_splice_tail(&holes, &blocks); + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_range(void *arg) +{ + struct i915_buddy_mm mm; + unsigned long page_num; + LIST_HEAD(blocks); + u64 chunk_size; + u64 offset; + u64 size; + u64 rem; + int err; + + igt_mm_config(&size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", size, chunk_size); + + err = i915_buddy_init(&mm, size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort, abort, abort!\n"); + goto err_fini; + } + + rem = mm.size; + offset = 0; + + for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { + struct i915_buddy_block *block; + LIST_HEAD(tmp); + + size = min(page_num * mm.chunk_size, rem); + + err = i915_buddy_alloc_range(&mm, &tmp, offset, size); + if (err) { + if (err == -ENOMEM) { + pr_info("alloc_range hit -ENOMEM with size=%llx\n", + size); + } else { + pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", + offset, size, err); + } + + break; + } + + block = list_first_entry_or_null(&tmp, + struct i915_buddy_block, + link); + if (!block) { + pr_err("alloc_range has no blocks\n"); + err = -EINVAL; + break; + } + + if (i915_buddy_block_offset(block) != offset) { + pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", + i915_buddy_block_offset(block), offset); + err = -EINVAL; + } + + if (!err) + err = igt_check_blocks(&mm, &tmp, size, true); + + list_splice_tail(&tmp, &blocks); + + if (err) + break; + + offset += size; + + rem -= size; + if (!rem) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + +err_fini: + i915_buddy_fini(&mm); + + return err; +} + +int i915_buddy_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_buddy_alloc_pessimistic), + SUBTEST(igt_buddy_alloc_optimistic), + SUBTEST(igt_buddy_alloc_pathological), + SUBTEST(igt_buddy_alloc_smoke), + SUBTEST(igt_buddy_alloc_range), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index c6a01a6e87f1..78f36faf2bbe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -8,29 +8,34 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "i915_selftest.h" #include "igt_flush_test.h" #include "mock_drm.h" -static int switch_to_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx) +static int switch_to_context(struct i915_gem_context *ctx) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; - for_each_engine(engine, i915, id) { + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } i915_request_add(rq); } + i915_gem_context_unlock_engines(ctx); - return 0; + return err; } static void trash_stolen(struct drm_i915_private *i915) @@ -41,6 +46,10 @@ static void trash_stolen(struct drm_i915_private *i915) unsigned long page; u32 prng = 0x12345678; + /* XXX: fsck. needs some more thought... */ + if (!i915_ggtt_has_aperture(ggtt)) + return; + for (page = 0; page < size; page += PAGE_SIZE) { const dma_addr_t dma = i915->dsm.start + page; u32 __iomem *s; @@ -115,8 +124,9 @@ static void pm_resume(struct drm_i915_private *i915) * that runtime-pm just works. */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - intel_gt_sanitize(i915, false); - i915_gem_sanitize(i915); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(&i915->ggtt); + i915_gem_resume(i915); } } @@ -125,7 +135,7 @@ static int igt_gem_suspend(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx; - struct drm_file *file; + struct file *file; int err; file = mock_file(i915); @@ -133,11 +143,9 @@ static int igt_gem_suspend(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); + err = switch_to_context(ctx); if (err) goto out; @@ -152,13 +160,9 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); - err = switch_to_context(i915, ctx); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); + err = switch_to_context(ctx); out: - mock_file_free(i915, file); + fput(file); return err; } @@ -166,7 +170,7 @@ static int igt_gem_hibernate(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx; - struct drm_file *file; + struct file *file; int err; file = mock_file(i915); @@ -174,11 +178,9 @@ static int igt_gem_hibernate(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); + err = switch_to_context(ctx); if (err) goto out; @@ -193,13 +195,9 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); - err = switch_to_context(i915, ctx); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); + err = switch_to_context(ctx); out: - mock_file_free(i915, file); + fput(file); return err; } @@ -210,8 +208,8 @@ int i915_gem_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_hibernate), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index a3cb0aade6f1..06ef88510209 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -25,6 +25,7 @@ #include "gem/i915_gem_pm.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" @@ -42,31 +43,34 @@ static void quirk_add(struct drm_i915_gem_object *obj, list_add(&obj->st_link, objects); } -static int populate_ggtt(struct drm_i915_private *i915, - struct list_head *objects) +static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) { unsigned long unbound, bound, count; struct drm_i915_gem_object *obj; - u64 size; count = 0; - for (size = 0; - size + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - size += I915_GTT_PAGE_SIZE) { + do { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - quirk_add(obj, objects); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + if (vma == ERR_PTR(-ENOSPC)) + break; + return PTR_ERR(vma); + } + quirk_add(obj, objects); count++; - } + } while (1); + pr_debug("Filled GGTT with %lu pages [%llu total]\n", + count, ggtt->vm.total / PAGE_SIZE); bound = 0; unbound = 0; @@ -92,7 +96,7 @@ static int populate_ggtt(struct drm_i915_private *i915, return -EINVAL; } - if (list_empty(&i915->ggtt.vm.bound_list)) { + if (list_empty(&ggtt->vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -100,20 +104,16 @@ static int populate_ggtt(struct drm_i915_private *i915, return 0; } -static void unpin_ggtt(struct drm_i915_private *i915) +static void unpin_ggtt(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - mutex_lock(&ggtt->vm.mutex); - list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); - mutex_unlock(&ggtt->vm.mutex); } -static void cleanup_objects(struct drm_i915_private *i915, - struct list_head *list) +static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) { struct drm_i915_gem_object *obj, *on; @@ -123,44 +123,44 @@ static void cleanup_objects(struct drm_i915_private *i915, i915_gem_object_put(obj); } - mutex_unlock(&i915->drm.struct_mutex); - - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(ggtt->vm.i915); } static int igt_evict_something(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict one. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); /* Everything is unpinned, we should be able to evict something */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -168,13 +168,14 @@ static int igt_evict_something(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_overcommit(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; struct drm_i915_gem_object *obj; struct i915_vma *vma; LIST_HEAD(objects); @@ -184,11 +185,11 @@ static int igt_overcommit(void *arg) * We expect it to fail. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -197,21 +198,21 @@ static int igt_overcommit(void *arg) quirk_add(obj, &objects); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { - pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + if (vma != ERR_PTR(-ENOSPC)) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR_OR_ZERO(vma)); err = -EINVAL; goto cleanup; } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_evict_for_vma(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; struct drm_mm_node target = { .start = 0, .size = 4096, @@ -221,22 +222,26 @@ static int igt_evict_for_vma(void *arg) /* Fill the GGTT with pinned objects and try to evict a range. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", err); goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); /* Everything is unpinned, we should be able to evict the node */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_for_node returned err=%d\n", err); @@ -244,7 +249,7 @@ static int igt_evict_for_vma(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } @@ -257,8 +262,8 @@ static void mock_color_adjust(const struct drm_mm_node *node, static int igt_evict_for_cache_color(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; const unsigned long flags = PIN_OFFSET_FIXED; struct drm_mm_node target = { .start = I915_GTT_PAGE_SIZE * 2, @@ -270,14 +275,16 @@ static int igt_evict_for_cache_color(void *arg) LIST_HEAD(objects); int err; - /* Currently the use of color_adjust is limited to cache domains within - * the ggtt, and so the presence of mm.color_adjust is assumed to be - * i915_gtt_color_adjust throughout our driver, so using a mock color - * adjust will work just fine for our purposes. + /* + * Currently the use of color_adjust for the GGTT is limited to cache + * coloring and guard pages, and so the presence of mm.color_adjust for + * the GGTT is assumed to be i915_ggtt_color_adjust, hence using a mock + * color adjust will work just fine for our purposes. */ ggtt->vm.mm.color_adjust = mock_color_adjust; + GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm)); - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -293,7 +300,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -313,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg) i915_vma_unpin(vma); /* Remove just the second vma */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); goto cleanup; @@ -324,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg) */ target.color = I915_CACHE_L3_LLC; + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (!err) { pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); err = -EINVAL; @@ -334,36 +345,40 @@ static int igt_evict_for_cache_color(void *arg) err = 0; cleanup: - unpin_ggtt(i915); - cleanup_objects(i915, &objects); + unpin_ggtt(ggtt); + cleanup_objects(ggtt, &objects); ggtt->vm.mm.color_adjust = NULL; return err; } static int igt_evict_vm(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict everything. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -371,14 +386,16 @@ static int igt_evict_vm(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_evict_contexts(void *arg) { const u64 PRETEND_GGTT_SIZE = 16ull << 20; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; struct reserved { @@ -404,14 +421,14 @@ static int igt_evict_contexts(void *arg) if (!HAS_FULL_PPGTT(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); - err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, + mutex_lock(&ggtt->vm.mutex); + err = i915_gem_gtt_insert(&ggtt->vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, PIN_NOEVICT); if (err) goto out_locked; @@ -421,15 +438,17 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; + mutex_unlock(&ggtt->vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); + mutex_lock(&ggtt->vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; } - if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node, + if (i915_gem_gtt_insert(&ggtt->vm, &r->node, 1ul << 20, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, PIN_NOEVICT)) { kfree(r); break; @@ -441,13 +460,13 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&ggtt->vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_sw_fence fence; - struct drm_file *file; + struct file *file; file = mock_file(i915); if (IS_ERR(file)) { @@ -456,7 +475,6 @@ static int igt_evict_contexts(void *arg) } count = 0; - mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { struct i915_request *rq; @@ -474,8 +492,8 @@ static int igt_evict_contexts(void *arg) if (IS_ERR(rq)) { /* When full, fail_if_busy will trigger EBUSY */ if (PTR_ERR(rq) != -EBUSY) { - pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", - ctx->hw_id, engine->name, + pr_err("Unexpected error from request alloc (on %s): %d\n", + engine->name, (int)PTR_ERR(rq)); err = PTR_ERR(rq); } @@ -493,20 +511,18 @@ static int igt_evict_contexts(void *arg) count++; err = 0; } while(1); - mutex_unlock(&i915->drm.struct_mutex); - onstack_fence_fini(&fence); pr_info("Submitted %lu contexts/requests on %s\n", count, engine->name); - mock_file_free(i915, file); + fput(file); if (err) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&ggtt->vm.mutex); out_locked: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; while (reserved) { struct reserved *next = reserved->next; @@ -518,8 +534,8 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); + mutex_unlock(&ggtt->vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -541,11 +557,8 @@ int i915_gem_evict_mock_selftests(void) if (!i915) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, i915); - - mutex_unlock(&i915->drm.struct_mutex); + err = i915_subtests(tests, &i915->gt); drm_dev_put(&i915->drm); return err; @@ -557,8 +570,8 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_evict_contexts), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 1a60b9fe8221..b342bef5e7c9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -25,26 +25,21 @@ #include <linux/list_sort.h> #include <linux/prime_numbers.h> +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_context.h" #include "i915_random.h" #include "i915_selftest.h" #include "mock_drm.h" #include "mock_gem_device.h" +#include "mock_gtt.h" +#include "igt_flush_test.h" static void cleanup_freed_objects(struct drm_i915_private *i915) { - /* - * As we may hold onto the struct_mutex for inordinate lengths of - * time, the NMI khungtaskd detector may fire for the free objects - * worker. - */ - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static void fake_free_pages(struct drm_i915_gem_object *obj, @@ -88,8 +83,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) } GEM_BUG_ON(rem); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; @@ -101,7 +94,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj, { fake_free_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -113,6 +105,7 @@ static const struct drm_i915_gem_object_ops fake_ops = { static struct drm_i915_gem_object * fake_dma_object(struct drm_i915_private *i915, u64 size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -126,7 +119,9 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) goto err; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -157,7 +152,7 @@ static int igt_ppgtt_alloc(void *arg) if (!HAS_PPGTT(dev_priv)) return 0; - ppgtt = __ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -208,22 +203,21 @@ static int igt_ppgtt_alloc(void *arg) } err_ppgtt_cleanup: - mutex_lock(&dev_priv->drm.struct_mutex); i915_vm_put(&ppgtt->vm); - mutex_unlock(&dev_priv->drm.struct_mutex); return err; } -static int lowlevel_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int lowlevel_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { I915_RND_STATE(seed_prng); + struct i915_vma *mock_vma; unsigned int size; - struct i915_vma mock_vma; - memset(&mock_vma, 0, sizeof(struct i915_vma)); + mock_vma = kzalloc(sizeof(*mock_vma), GFP_KERNEL); + if (!mock_vma) + return -ENOMEM; /* Keep creating larger objects until one cannot fit into the hole */ for (size = 12; (hole_end - hole_start) >> size; size++) { @@ -247,8 +241,10 @@ static int lowlevel_hole(struct drm_i915_private *i915, if (order) break; } while (count >>= 1); - if (!count) + if (!count) { + kfree(mock_vma); return -ENOMEM; + } GEM_BUG_ON(!order); GEM_BUG_ON(count * BIT_ULL(size) > vm->total); @@ -260,7 +256,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, * memory. We expect to hit -ENOMEM. */ - obj = fake_dma_object(i915, BIT_ULL(size)); + obj = fake_dma_object(vm->i915, BIT_ULL(size)); if (IS_ERR(obj)) { kfree(order); break; @@ -291,22 +287,24 @@ static int lowlevel_hole(struct drm_i915_private *i915, vm->allocate_va_range(vm, addr, BIT_ULL(size))) break; - mock_vma.pages = obj->mm.pages; - mock_vma.node.size = BIT_ULL(size); - mock_vma.node.start = addr; + mock_vma->pages = obj->mm.pages; + mock_vma->node.size = BIT_ULL(size); + mock_vma->node.start = addr; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) + vm->insert_entries(vm, mock_vma, + I915_CACHE_NONE, 0); } count = n; i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { u64 addr = hole_start + order[n] * BIT_ULL(size); + intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); - vm->clear_range(vm, addr, BIT_ULL(size)); + with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) + vm->clear_range(vm, addr, BIT_ULL(size)); } i915_gem_object_unpin_pages(obj); @@ -314,9 +312,10 @@ static int lowlevel_hole(struct drm_i915_private *i915, kfree(order); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } + kfree(mock_vma); return 0; } @@ -341,8 +340,7 @@ static void close_object_list(struct list_head *objects, } } -static int fill_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int fill_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -375,7 +373,7 @@ static int fill_hole(struct drm_i915_private *i915, { } }, *p; - obj = fake_dma_object(i915, full_size); + obj = fake_dma_object(vm->i915, full_size); if (IS_ERR(obj)) break; @@ -543,7 +541,7 @@ static int fill_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; @@ -553,8 +551,7 @@ err: return err; } -static int walk_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int walk_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -576,7 +573,7 @@ static int walk_hole(struct drm_i915_private *i915, u64 addr; int err = 0; - obj = fake_dma_object(i915, size << PAGE_SHIFT); + obj = fake_dma_object(vm->i915, size << PAGE_SHIFT); if (IS_ERR(obj)) break; @@ -631,14 +628,13 @@ err_put: if (err) return err; - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; } -static int pot_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int pot_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -652,7 +648,7 @@ static int pot_hole(struct drm_i915_private *i915, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; - obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -713,8 +709,7 @@ err_obj: return err; } -static int drunk_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int drunk_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -759,7 +754,7 @@ static int drunk_hole(struct drm_i915_private *i915, * memory. We expect to hit -ENOMEM. */ - obj = fake_dma_object(i915, BIT_ULL(size)); + obj = fake_dma_object(vm->i915, BIT_ULL(size)); if (IS_ERR(obj)) { kfree(order); break; @@ -817,14 +812,13 @@ err_obj: if (err) return err; - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; } -static int __shrink_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int __shrink_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -841,7 +835,7 @@ static int __shrink_hole(struct drm_i915_private *i915, u64 size = BIT_ULL(order++); size = min(size, hole_end - addr); - obj = fake_dma_object(i915, size); + obj = fake_dma_object(vm->i915, size); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -877,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915, i915_vma_unpin(vma); addr += size; + /* + * Since we are injecting allocation faults at random intervals, + * wait for this allocation to complete before we change the + * faultinjection. + */ + err = i915_vma_sync(vma); + if (err) + break; + if (igt_timeout(end_time, "%s timed out at ofset %llx [%llx - %llx]\n", __func__, addr, hole_start, hole_end)) { @@ -886,12 +889,11 @@ static int __shrink_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); return err; } -static int shrink_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int shrink_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -903,7 +905,7 @@ static int shrink_hole(struct drm_i915_private *i915, for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { vm->fault_attr.interval = prime; - err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + err = __shrink_hole(vm, hole_start, hole_end, end_time); if (err) break; } @@ -913,8 +915,7 @@ static int shrink_hole(struct drm_i915_private *i915, return err; } -static int shrink_boom(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int shrink_boom(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -936,7 +937,7 @@ static int shrink_boom(struct drm_i915_private *i915, unsigned int size = sizes[i]; struct i915_vma *vma; - purge = fake_dma_object(i915, size); + purge = fake_dma_object(vm->i915, size); if (IS_ERR(purge)) return PTR_ERR(purge); @@ -953,7 +954,7 @@ static int shrink_boom(struct drm_i915_private *i915, /* Should now be ripe for purging */ i915_vma_unpin(vma); - explode = fake_dma_object(i915, size); + explode = fake_dma_object(vm->i915, size); if (IS_ERR(explode)) { err = PTR_ERR(explode); goto err_purge; @@ -979,7 +980,7 @@ static int shrink_boom(struct drm_i915_private *i915, i915_gem_object_put(explode); memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; @@ -993,14 +994,13 @@ err_purge: } static int exercise_ppgtt(struct drm_i915_private *dev_priv, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { - struct drm_file *file; struct i915_ppgtt *ppgtt; IGT_TIMEOUT(end_time); + struct file *file; int err; if (!HAS_FULL_PPGTT(dev_priv)) @@ -1010,22 +1010,20 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); - goto out_unlock; + goto out_free; } GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); - GEM_BUG_ON(ppgtt->vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); - err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); + err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time); i915_vm_put(&ppgtt->vm); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); - mock_file_free(dev_priv, file); +out_free: + fput(file); return err; } @@ -1076,8 +1074,7 @@ static int sort_holes(void *priv, struct list_head *A, struct list_head *B) } static int exercise_ggtt(struct drm_i915_private *i915, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { @@ -1087,7 +1084,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, IGT_TIMEOUT(end_time); int err = 0; - mutex_lock(&i915->drm.struct_mutex); restart: list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes); drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) { @@ -1100,7 +1096,7 @@ restart: if (hole_start >= hole_end) continue; - err = func(i915, &ggtt->vm, hole_start, hole_end, end_time); + err = func(&ggtt->vm, hole_start, hole_end, end_time); if (err) break; @@ -1108,7 +1104,6 @@ restart: last = hole_end; goto restart; } - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1150,24 +1145,25 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; - mutex_lock(&i915->drm.struct_mutex); + if (!i915_ggtt_has_aperture(ggtt)) + return 0; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); err = i915_gem_object_pin_pages(obj); if (err) goto out_free; memset(&tmp, 0, sizeof(tmp)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range(&ggtt->vm.mm, &tmp, count * PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) goto out_unpin; @@ -1195,7 +1191,7 @@ static int igt_ggtt_page(void *arg) iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); } - i915_gem_flush_ggtt_writes(i915); + intel_gt_flush_ggtt_writes(ggtt->vm.gt); i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { @@ -1219,13 +1215,13 @@ static int igt_ggtt_page(void *arg) out_remove: ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size); intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&tmp); + mutex_unlock(&ggtt->vm.mutex); out_unpin: i915_gem_object_unpin_pages(obj); out_free: i915_gem_object_put(obj); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1236,20 +1232,23 @@ static void track_vma_bind(struct i915_vma *vma) atomic_inc(&obj->bind_count); /* track for eviction later */ __i915_gem_object_pin_pages(obj); + GEM_BUG_ON(vma->pages); + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); } static int exercise_mock(struct drm_i915_private *i915, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { const u64 limit = totalram_pages() << PAGE_SHIFT; + struct i915_address_space *vm; struct i915_gem_context *ctx; IGT_TIMEOUT(end_time); int err; @@ -1258,7 +1257,9 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time); + vm = i915_gem_context_get_vm_rcu(ctx); + err = func(vm, 0, min(vm->total, limit), end_time); + i915_vm_put(vm); mock_context_close(ctx); return err; @@ -1296,6 +1297,7 @@ static int igt_gtt_reserve(void *arg) { struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; + I915_RND_STATE(prng); LIST_HEAD(objects); u64 total; int err = -ENODEV; @@ -1332,11 +1334,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1382,11 +1386,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1422,15 +1428,18 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, ggtt->vm.total, - 2*I915_GTT_PAGE_SIZE, - I915_GTT_MIN_ALIGNMENT); + offset = igt_random_offset(&prng, + 0, ggtt->vm.total, + 2 * I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1499,11 +1508,13 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", ii->size, ii->alignment, ii->start, ii->end, @@ -1539,10 +1550,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err == -ENOSPC) { /* maxed out the GGTT space */ i915_gem_object_put(obj); @@ -1597,10 +1610,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1644,10 +1659,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1691,13 +1708,10 @@ int i915_gem_gtt_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); + mock_device_flush(i915); i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -1705,6 +1719,312 @@ out_put: return err; } +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + long timeout; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + timeout = i915_request_wait(rq, 0, HZ / 5); + i915_request_put(rq); + + return timeout < 0 ? -EIO : 0; +} + +static struct i915_request * +submit_batch(struct intel_context *ce, u64 addr) +{ + struct i915_request *rq; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + err = 0; + if (rq->engine->emit_init_breadcrumb) /* detect a hang */ + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = rq->engine->emit_bb_start(rq, addr, 0, 0); + + if (err == 0) + i915_request_get(rq); + i915_request_add(rq); + + return err ? ERR_PTR(err) : rq; +} + +static u32 *spinner(u32 *batch, int i) +{ + return batch + i * 64 / sizeof(*batch) + 4; +} + +static void end_spin(u32 *batch, int i) +{ + *spinner(batch, i) = MI_BATCH_BUFFER_END; + wmb(); +} + +static int igt_cs_tlb(void *arg) +{ + const unsigned int count = PAGE_SIZE / 64; + const unsigned int chunk_size = count * PAGE_SIZE; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *bbe, *act, *out; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct i915_vma *vma; + I915_RND_STATE(prng); + struct file *file; + unsigned int i; + u32 *result; + u32 *batch; + int err = 0; + + /* + * Our mission here is to fool the hardware to execute something + * from scratch as it has not seen the batch move (due to missing + * the TLB invalidate). + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + vm = i915_gem_context_get_vm_rcu(ctx); + if (i915_is_ggtt(vm)) + goto out_vm; + + /* Create two pages; dummy we prefill the TLB, and intended */ + bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(bbe)) { + err = PTR_ERR(bbe); + goto out_vm; + } + + batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_bbe; + } + memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32)); + i915_gem_object_flush_map(bbe); + i915_gem_object_unpin_map(bbe); + + act = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(act)) { + err = PTR_ERR(act); + goto out_put_bbe; + } + + /* Track the execution of each request by writing into different slot */ + batch = i915_gem_object_pin_map(act, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_act; + } + for (i = 0; i < count; i++) { + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32); + + GEM_BUG_ON(INTEL_GEN(i915) < 6); + cs[0] = MI_STORE_DWORD_IMM_GEN4; + if (INTEL_GEN(i915) >= 8) { + cs[1] = lower_32_bits(addr); + cs[2] = upper_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START_GEN8; + } else { + cs[1] = 0; + cs[2] = lower_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START; + } + } + + out = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(out)) { + err = PTR_ERR(out); + goto out_put_batch; + } + i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED); + + vma = i915_vma_instance(out, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put_batch; + } + + err = i915_vma_pin(vma, 0, 0, + PIN_USER | + PIN_OFFSET_FIXED | + (vm->total - PAGE_SIZE)); + if (err) + goto out_put_out; + GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE); + + result = i915_gem_object_pin_map(out, I915_MAP_WB); + if (IS_ERR(result)) { + err = PTR_ERR(result); + goto out_put_out; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + IGT_TIMEOUT(end_time); + unsigned long pass = 0; + + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + while (!__igt_timeout(end_time, NULL)) { + struct i915_request *rq; + u64 offset; + + offset = igt_random_offset(&prng, + 0, vm->total - PAGE_SIZE, + chunk_size, PAGE_SIZE); + + err = vm->allocate_va_range(vm, offset, chunk_size); + if (err) + goto end; + + memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + vma = i915_vma_instance(bbe, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Prime the TLB with the dummy pages */ + for (i = 0; i < count; i++) { + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + rq = submit_batch(ce, vma->node.start); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + i915_request_put(rq); + } + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: dummy setup timed out\n", + ce->engine->name); + goto end; + } + + vma = i915_vma_instance(act, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Replace the TLB with target batches */ + for (i = 0; i < count; i++) { + struct i915_request *rq; + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr; + + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + addr = vma->node.start + i * 64; + cs[4] = MI_NOOP; + cs[6] = lower_32_bits(addr); + cs[7] = upper_32_bits(addr); + wmb(); + + rq = submit_batch(ce, addr); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + + /* Wait until the context chain has started */ + if (i == 0) { + while (READ_ONCE(result[i]) && + !i915_request_completed(rq)) + cond_resched(); + } else { + end_spin(batch, i - 1); + } + + i915_request_put(rq); + } + end_spin(batch, count - 1); + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: writes timed out\n", + ce->engine->name); + goto end; + } + + for (i = 0; i < count; i++) { + if (result[i] != i) { + pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n", + ce->engine->name, pass, + offset, i, result[i], i); + err = -EINVAL; + goto end; + } + } + + vm->clear_range(vm, offset, chunk_size); + pass++; + } + } +end: + if (igt_flush_test(i915)) + err = -EIO; + i915_gem_context_unlock_engines(ctx); + i915_gem_object_unpin_map(out); +out_put_out: + i915_gem_object_put(out); +out_put_batch: + i915_gem_object_unpin_map(act); +out_put_act: + i915_gem_object_put(act); +out_put_bbe: + i915_gem_object_put(bbe); +out_vm: + i915_vm_put(vm); +out_unlock: + fput(file); + return err; +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -1722,6 +2042,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), + SUBTEST(igt_cs_tlb), }; GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total)); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d5dc4427d664..34138c7bdd15 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -1,5 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* List each unit test as selftest(name, function) + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create * a module parameter. It must be unique and legal for a C identifier. @@ -12,7 +18,13 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) -selftest(timelines, i915_timeline_live_selftests) +selftest(gt_engines, intel_engine_live_selftests) +selftest(gt_timelines, intel_timeline_live_selftests) +selftest(gt_contexts, intel_context_live_selftests) +selftest(gt_lrc, intel_lrc_live_selftests) +selftest(gt_mocs, intel_mocs_live_selftests) +selftest(gt_pm, intel_gt_pm_live_selftests) +selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) @@ -24,10 +36,13 @@ selftest(gtt, i915_gem_gtt_live_selftests) selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) -selftest(contexts, i915_gem_context_live_selftests) +selftest(gem_contexts, i915_gem_context_live_selftests) selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) +selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) -selftest(guc, intel_guc_live_selftest) +selftest(perf, i915_perf_live_selftests) +/* Here be dragons: keep last to run last! */ +selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 510eb176bb2c..5b39bab4da1d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -1,5 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* List each unit test as selftest(name, function) + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create * a module parameter. It must be unique and legal for a C identifier. @@ -15,7 +21,7 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(timelines, i915_timeline_mock_selftests) +selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(phys, i915_gem_phys_mock_selftests) @@ -25,3 +31,5 @@ selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) +selftest(buddy, i915_buddy_mock_selftests) +selftest(memory_region, intel_memory_region_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c new file mode 100644 index 000000000000..d1a1568c47ba --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -0,0 +1,217 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kref.h> + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + +#include "i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +static struct i915_perf_stream * +test_stream(struct i915_perf *perf) +{ + struct drm_i915_perf_open_param param = {}; + struct perf_open_properties props = { + .engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0), + .sample_flags = SAMPLE_OA_REPORT, + .oa_format = IS_GEN(perf->i915, 12) ? + I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, + .metrics_set = 1, + }; + struct i915_perf_stream *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + return NULL; + + stream->perf = perf; + + mutex_lock(&perf->lock); + if (i915_oa_stream_init(stream, ¶m, &props)) { + kfree(stream); + stream = NULL; + } + mutex_unlock(&perf->lock); + + return stream; +} + +static void stream_destroy(struct i915_perf_stream *stream) +{ + struct i915_perf *perf = stream->perf; + + mutex_lock(&perf->lock); + i915_perf_destroy_locked(stream); + mutex_unlock(&perf->lock); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + + /* Quick check we can create a perf stream */ + + stream = test_stream(&i915->perf); + if (!stream) + return -EINVAL; + + stream_destroy(stream); + return 0; +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 *cs; + int len; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + len = 5; + if (INTEL_GEN(rq->i915) >= 8) + len++; + + *cs++ = GFX_OP_PIPE_CONTROL(len); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_WRITE_TIMESTAMP; + *cs++ = slot * sizeof(u32); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static ktime_t poll_status(struct i915_request *rq, int slot) +{ + while (!intel_read_status_page(rq->engine, slot) && + !i915_request_completed(rq)) + cpu_relax(); + + return ktime_get(); +} + +static int live_noa_delay(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + struct i915_request *rq; + ktime_t t0, t1; + u64 expected; + u32 delay; + int err; + int i; + + /* Check that the GPU delays matches expectations */ + + stream = test_stream(&i915->perf); + if (!stream) + return -ENOMEM; + + expected = atomic64_read(&stream->perf->noa_programming_delay); + + if (stream->engine->class != RENDER_CLASS) { + err = -ENODEV; + goto out; + } + + for (i = 0; i < 4; i++) + intel_write_status_page(stream->engine, 0x100 + i, 0); + + rq = intel_engine_create_kernel_request(stream->engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb && + i915_request_timeline(rq)->has_initial_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + err = write_timestamp(rq, 0x100); + if (err) { + i915_request_add(rq); + goto out; + } + + err = rq->engine->emit_bb_start(rq, + i915_ggtt_offset(stream->noa_wait), 0, + I915_DISPATCH_SECURE); + if (err) { + i915_request_add(rq); + goto out; + } + + err = write_timestamp(rq, 0x102); + if (err) { + i915_request_add(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + preempt_disable(); + t0 = poll_status(rq, 0x100); + t1 = poll_status(rq, 0x102); + preempt_enable(); + + pr_info("CPU delay: %lluns, expected %lluns\n", + ktime_sub(t1, t0), expected); + + delay = intel_read_status_page(stream->engine, 0x102); + delay -= intel_read_status_page(stream->engine, 0x100); + delay = div_u64(mul_u32_u32(delay, 1000 * 1000), + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + pr_info("GPU delay: %uns, expected %lluns\n", + delay, expected); + + if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { + pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", + delay / 1000, + div_u64(3 * expected, 4000), + div_u64(3 * expected, 2000)); + err = -EINVAL; + } + + i915_request_put(rq); +out: + stream_destroy(stream); + return err; +} + +int i915_perf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_noa_delay), + }; + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj || !perf->ops.enable_metric_set) + return 0; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h new file mode 100644 index 000000000000..5a577a1332f5 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/i915_selftest + */ +selftest(engine_cs, intel_engine_cs_perf_selftests) +selftest(blt, i915_gem_object_blt_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 716a3f19f030..abdfadcf626b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -29,6 +29,7 @@ #include <linux/types.h> #include "i915_random.h" +#include "i915_utils.h" u64 i915_prandom_u64_state(struct rnd_state *rnd) { @@ -87,3 +88,22 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) i915_random_reorder(order, count, state); return order; } + +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align) +{ + u64 range, addr; + + BUG_ON(range_overflows(start, len, end)); + BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + addr = i915_prandom_u64_state(state); + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 8e1ff9c105b6..05364eca20f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -25,6 +25,7 @@ #ifndef __I915_SELFTESTS_RANDOM_H__ #define __I915_SELFTESTS_RANDOM_H__ +#include <linux/math64.h> #include <linux/random.h> #include "../i915_selftest.h" @@ -57,4 +58,8 @@ void i915_random_reorder(unsigned int *order, void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, struct rnd_state *state); +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 298bb7116c51..f89d9c42f1fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -27,35 +27,44 @@ #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" + #include "i915_random.h" #include "i915_selftest.h" #include "igt_live_test.h" +#include "igt_spinner.h" #include "lib_sw_fence.h" #include "mock_drm.h" #include "mock_gem_device.h" +static unsigned int num_uabi_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + return count; +} + static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; struct i915_request *request; - int err = -ENOMEM; /* Basic preliminary test to create a request and let it loose! */ - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS0], - i915->kernel_context, - HZ / 10); + request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); if (!request) - goto out_unlock; + return -ENOMEM; i915_request_add(request); - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int igt_wait_request(void *arg) @@ -67,64 +76,63 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS0], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } + request = mock_request(i915->engine[RCS0]->kernel_context, T); + if (!request) + return -ENOMEM; + + i915_request_get(request); if (i915_request_wait(request, 0, 0) != -ETIME) { pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); - goto out_unlock; + goto out_request; } if (i915_request_wait(request, 0, T) != -ETIME) { pr_err("request wait succeeded (expected timeout before submit!)\n"); - goto out_unlock; + goto out_request; } if (i915_request_completed(request)) { pr_err("request completed before submit!!\n"); - goto out_unlock; + goto out_request; } i915_request_add(request); if (i915_request_wait(request, 0, 0) != -ETIME) { pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); - goto out_unlock; + goto out_request; } if (i915_request_completed(request)) { pr_err("request completed immediately!\n"); - goto out_unlock; + goto out_request; } if (i915_request_wait(request, 0, T / 2) != -ETIME) { pr_err("request wait succeeded (expected timeout!)\n"); - goto out_unlock; + goto out_request; } if (i915_request_wait(request, 0, T) == -ETIME) { pr_err("request wait timed out!\n"); - goto out_unlock; + goto out_request; } if (!i915_request_completed(request)) { pr_err("request not complete after waiting!\n"); - goto out_unlock; + goto out_request; } if (i915_request_wait(request, 0, T) == -ETIME) { pr_err("request wait timed out when already complete!\n"); - goto out_unlock; + goto out_request; } err = 0; -out_unlock: +out_request: + i915_request_put(request); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -137,52 +145,45 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS0], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } + request = mock_request(i915->engine[RCS0]->kernel_context, T); + if (!request) + return -ENOMEM; if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_locked; + goto out; } i915_request_add(request); - mutex_unlock(&i915->drm.struct_mutex); if (dma_fence_is_signaled(&request->fence)) { pr_err("fence signaled immediately!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out (expected success)!\n"); - goto out_device; + goto out; } if (!dma_fence_is_signaled(&request->fence)) { pr_err("fence unsignaled after waiting!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; + goto out; } err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -191,11 +192,15 @@ static int igt_request_rewind(void *arg) struct drm_i915_private *i915 = arg; struct i915_request *request, *vip; struct i915_gem_context *ctx[2]; + struct intel_context *ce; int err = -EINVAL; - mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); - request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); + + ce = i915_gem_context_get_engine(ctx[0], RCS0); + GEM_BUG_ON(IS_ERR(ce)); + request = mock_request(ce, 2 * HZ); + intel_context_put(ce); if (!request) { err = -ENOMEM; goto err_context_0; @@ -205,7 +210,11 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); - vip = mock_request(i915->engine[RCS0], ctx[1], 0); + + ce = i915_gem_context_get_engine(ctx[1], RCS0); + GEM_BUG_ON(IS_ERR(ce)); + vip = mock_request(ce, 0); + intel_context_put(ce); if (!vip) { err = -ENOMEM; goto err_context_1; @@ -223,7 +232,6 @@ static int igt_request_rewind(void *arg) request->engine->submit_request(request); rcu_read_unlock(); - mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { pr_err("timed out waiting for high priority request\n"); @@ -238,14 +246,12 @@ static int igt_request_rewind(void *arg) err = 0; err: i915_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); err_context_1: mock_context_close(ctx[1]); i915_request_put(request); err_context_0: mock_context_close(ctx[0]); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -254,28 +260,24 @@ struct smoketest { struct i915_gem_context **contexts; atomic_long_t num_waits, num_fences; int ncontexts, max_batch; - struct i915_request *(*request_alloc)(struct i915_gem_context *, - struct intel_engine_cs *); + struct i915_request *(*request_alloc)(struct intel_context *ce); }; static struct i915_request * -__mock_request_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +__mock_request_alloc(struct intel_context *ce) { - return mock_request(engine, ctx, 0); + return mock_request(ce, 0); } static struct i915_request * -__live_request_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +__live_request_alloc(struct intel_context *ce) { - return igt_request_alloc(ctx, engine); + return intel_context_create_request(ce); } static int __igt_breadcrumbs_smoketest(void *arg) { struct smoketest *t = arg; - struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; const unsigned int total = 4 * t->ncontexts + 1; unsigned int num_waits = 0, num_fences = 0; @@ -293,7 +295,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) * that the fences were marked as signaled. */ - requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); if (!requests) return -ENOMEM; @@ -328,12 +330,13 @@ static int __igt_breadcrumbs_smoketest(void *arg) struct i915_gem_context *ctx = t->contexts[order[n] % t->ncontexts]; struct i915_request *rq; + struct intel_context *ce; - mutex_lock(BKL); - - rq = t->request_alloc(ctx, t->engine); + ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + rq = t->request_alloc(ce); + intel_context_put(ce); if (IS_ERR(rq)) { - mutex_unlock(BKL); err = PTR_ERR(rq); count = n; break; @@ -346,8 +349,6 @@ static int __igt_breadcrumbs_smoketest(void *arg) requests[n] = i915_request_get(rq); i915_request_add(rq); - mutex_unlock(BKL); - if (err >= 0) err = i915_sw_fence_await_dma_fence(wait, &rq->fence, @@ -366,14 +367,16 @@ static int __igt_breadcrumbs_smoketest(void *arg) if (!wait_event_timeout(wait->wait, i915_sw_fence_done(wait), - HZ / 2)) { + 5 * HZ)) { struct i915_request *rq = requests[count - 1]; - pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", - count, + pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", + atomic_read(&wait->pending), count, rq->fence.context, rq->fence.seqno, t->engine->name); - i915_gem_set_wedged(t->engine->i915); + GEM_TRACE_DUMP(); + + intel_gt_set_wedged(t->engine->gt); GEM_BUG_ON(!i915_request_completed(rq)); i915_sw_fence_wait(wait); err = -EIO; @@ -433,18 +436,16 @@ static int mock_breadcrumbs_smoketest(void *arg) * See __igt_breadcrumbs_smoketest(); */ - threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); if (!threads) return -ENOMEM; - t.contexts = - kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); if (!t.contexts) { ret = -ENOMEM; goto out_threads; } - mutex_lock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < t.ncontexts; n++) { t.contexts[n] = mock_context(t.engine->i915, "mock"); if (!t.contexts[n]) { @@ -452,7 +453,6 @@ static int mock_breadcrumbs_smoketest(void *arg) goto out_contexts; } } - mutex_unlock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < ncpus; n++) { threads[n] = kthread_run(__igt_breadcrumbs_smoketest, @@ -466,6 +466,7 @@ static int mock_breadcrumbs_smoketest(void *arg) get_task_struct(threads[n]); } + yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); for (n = 0; n < ncpus; n++) { @@ -482,18 +483,15 @@ static int mock_breadcrumbs_smoketest(void *arg) atomic_long_read(&t.num_fences), ncpus); - mutex_lock(&t.engine->i915->drm.struct_mutex); out_contexts: for (n = 0; n < t.ncontexts; n++) { if (!t.contexts[n]) break; mock_context_close(t.contexts[n]); } - mutex_unlock(&t.engine->i915->drm.struct_mutex); kfree(t.contexts); out_threads: kfree(threads); - return ret; } @@ -526,40 +524,38 @@ static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - unsigned int id; int err = -ENODEV; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - for_each_engine(engine, i915, id) { - struct i915_request *request = NULL; + for_each_uabi_engine(engine, i915) { unsigned long n, prime; IGT_TIMEOUT(end_time); ktime_t times[2] = {}; err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + return err; + intel_engine_pm_get(engine); for_each_prime_number_from(prime, 1, 8192) { + struct i915_request *request = NULL; + times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = i915_request_create(engine->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } + if (IS_ERR(request)) + return PTR_ERR(request); - /* This space is left intentionally blank. + /* + * This space is left intentionally blank. * * We do not actually want to perform any * action with this request, we just want @@ -572,9 +568,11 @@ static int live_nop_request(void *arg) * for latency. */ + i915_request_get(request); i915_request_add(request); } i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 1) @@ -583,10 +581,11 @@ static int live_nop_request(void *arg) if (__igt_timeout(end_time, NULL)) break; } + intel_engine_pm_put(engine); err = igt_live_test_end(&t); if (err) - goto out_unlock; + return err; pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -594,9 +593,6 @@ static int live_nop_request(void *arg) prime, div64_u64(ktime_to_ns(times[1]), prime)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -622,7 +618,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(&i915->gt); vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { @@ -634,8 +630,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; + /* Force the wait wait now to avoid including it in the benchmark */ + err = i915_vma_sync(vma); + if (err) + goto err_pin; + return vma; +err_pin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ERR_PTR(err); @@ -659,6 +662,7 @@ empty_request(struct intel_engine_cs *engine, if (err) goto out_request; + i915_request_get(request); out_request: i915_request_add(request); return err ? ERR_PTR(err) : request; @@ -668,27 +672,21 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; int err = 0; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } + if (IS_ERR(batch)) + return PTR_ERR(batch); - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); struct i915_request *request; unsigned long n, prime; @@ -698,10 +696,13 @@ static int live_empty_request(void *arg) if (err) goto out_batch; + intel_engine_pm_get(engine); + /* Warmup / preload */ request = empty_request(engine, batch); if (IS_ERR(request)) { err = PTR_ERR(request); + intel_engine_pm_put(engine); goto out_batch; } i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); @@ -710,9 +711,11 @@ static int live_empty_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = empty_request(engine, batch); if (IS_ERR(request)) { err = PTR_ERR(request); + intel_engine_pm_put(engine); goto out_batch; } } @@ -725,6 +728,8 @@ static int live_empty_request(void *arg) if (__igt_timeout(end_time, NULL)) break; } + i915_request_put(request); + intel_engine_pm_put(engine); err = igt_live_test_end(&t); if (err) @@ -739,16 +744,11 @@ static int live_empty_request(void *arg) out_batch: i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { - struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; const int gen = INTEL_GEN(i915); struct i915_vma *vma; @@ -759,7 +759,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, i915->gt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -791,7 +791,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(&i915->gt); return vma; @@ -809,7 +809,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(batch->vm->i915); + intel_gt_chipset_flush(batch->vm->gt); i915_gem_object_unpin_map(batch->obj); @@ -819,65 +819,73 @@ static int recursive_batch_resolve(struct i915_vma *batch) static int live_all_engines(void *arg) { struct drm_i915_private *i915 = arg; + const unsigned int nengines = num_uabi_engines(i915); struct intel_engine_cs *engine; - struct i915_request *request[I915_NUM_ENGINES]; - intel_wakeref_t wakeref; + struct i915_request **request; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines simultaneously. We + /* + * Check we can submit requests to all engines simultaneously. We * send a recursive batch to each engine - checking that we don't * block doing so, and that they don't complete too soon. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_free; batch = recursive_batch(i915); if (IS_ERR(batch)) { err = PTR_ERR(batch); pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; + goto out_free; } - for_each_engine(engine, i915, id) { - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + request[idx] = intel_engine_create_kernel_request(engine); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed with err=%d\n", __func__, err); goto out_request; } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_request_await_object(request[idx], batch->obj, 0); + if (err == 0) + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); + idx++; } - for_each_engine(engine, i915, id) { - if (i915_request_completed(request[id])) { + idx = 0; + for_each_uabi_engine(engine, i915) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } + idx++; } err = recursive_batch_resolve(batch); @@ -886,10 +894,11 @@ static int live_all_engines(void *arg) goto out_request; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -898,50 +907,56 @@ static int live_all_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); - i915_request_put(request[id]); - request[id] = NULL; + GEM_BUG_ON(!i915_request_completed(request[idx])); + i915_request_put(request[idx]); + request[idx] = NULL; + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) - if (request[id]) - i915_request_put(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + if (request[idx]) + i915_request_put(request[idx]); + idx++; + } i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_free: + kfree(request); return err; } static int live_sequential_engines(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_request *request[I915_NUM_ENGINES] = {}; + const unsigned int nengines = num_uabi_engines(i915); + struct i915_request **request; struct i915_request *prev = NULL; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines sequentially, such + /* + * Check we can submit requests to all engines sequentially, such * that each successive request waits for the earlier ones. This * tests that we don't execute requests out of order, even though * they are running on independent engines. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_free; - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { struct i915_vma *batch; batch = recursive_batch(i915); @@ -949,64 +964,69 @@ static int live_sequential_engines(void *arg) err = PTR_ERR(batch); pr_err("%s: Unable to create batch for %s, err=%d\n", __func__, engine->name, err); - goto out_unlock; + goto out_free; } - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + request[idx] = intel_engine_create_kernel_request(engine); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } if (prev) { - err = i915_request_await_dma_fence(request[id], + err = i915_request_await_dma_fence(request[idx], &prev->fence); if (err) { - i915_request_add(request[id]); + i915_request_add(request[idx]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_request_await_object(request[idx], + batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); - prev = request[id]; + prev = request[idx]; + idx++; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - if (i915_request_completed(request[id])) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } - err = recursive_batch_resolve(request[id]->batch); + err = recursive_batch_resolve(request[idx]->batch); if (err) { pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); goto out_request; } - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -1015,33 +1035,244 @@ static int live_sequential_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); + GEM_BUG_ON(!i915_request_completed(request[idx])); + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { u32 *cmd; - if (!request[id]) + if (!request[idx]) break; - cmd = i915_gem_object_pin_map(request[id]->batch->obj, + cmd = i915_gem_object_pin_map(request[idx]->batch->obj, I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(engine->gt); - i915_gem_object_unpin_map(request[id]->batch->obj); + i915_gem_object_unpin_map(request[idx]->batch->obj); } - i915_vma_put(request[id]->batch); - i915_request_put(request[id]); + i915_vma_put(request[idx]->batch); + i915_request_put(request[idx]); + idx++; } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_free: + kfree(request); + return err; +} + +static int __live_parallel_engine1(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + + count = 0; + intel_engine_pm_get(engine); + do { + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + intel_engine_pm_put(engine); + + pr_info("%s: %lu request + sync\n", engine->name, count); + return err; +} + +static int __live_parallel_engineN(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + + count = 0; + intel_engine_pm_get(engine); + do { + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + count++; + } while (!__igt_timeout(end_time, NULL)); + intel_engine_pm_put(engine); + + pr_info("%s: %lu requests\n", engine->name, count); + return err; +} + +static bool wake_all(struct drm_i915_private *i915) +{ + if (atomic_dec_and_test(&i915->selftest.counter)) { + wake_up_var(&i915->selftest.counter); + return true; + } + + return false; +} + +static int wait_for_all(struct drm_i915_private *i915) +{ + if (wake_all(i915)) + return 0; + + if (wait_var_event_timeout(&i915->selftest.counter, + !atomic_read(&i915->selftest.counter), + i915_selftest.timeout_jiffies)) + return 0; + + return -ETIME; +} + +static int __live_parallel_spin(void *arg) +{ + struct intel_engine_cs *engine = arg; + struct igt_spinner spin; + struct i915_request *rq; + int err = 0; + + /* + * Create a spinner running for eternity on each engine. If a second + * spinner is incorrectly placed on the same engine, it will not be + * able to start in time. + */ + + if (igt_spinner_init(&spin, engine->gt)) { + wake_all(engine->i915); + return -ENOMEM; + } + + intel_engine_pm_get(engine); + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); /* no preemption */ + intel_engine_pm_put(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + if (err == -ENODEV) + err = 0; + wake_all(engine->i915); + goto out_spin; + } + + i915_request_get(rq); + i915_request_add(rq); + if (igt_wait_for_spinner(&spin, rq)) { + /* Occupy this engine for the whole test */ + err = wait_for_all(engine->i915); + } else { + pr_err("Failed to start spinner on %s\n", engine->name); + err = -EINVAL; + } + igt_spinner_end(&spin); + + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + +out_spin: + igt_spinner_fini(&spin); + return err; +} + +static int live_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_engine1, + __live_parallel_engineN, + __live_parallel_spin, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct task_struct **tsk; + int err = 0; + + /* + * Check we can submit requests to all engines concurrently. This + * tests that we load up the system maximally. + */ + + tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + for (fn = func; !err && *fn; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + unsigned int idx; + + snprintf(name, sizeof(name), "%pS", fn); + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + atomic_set(&i915->selftest.counter, nengines); + + idx = 0; + for_each_uabi_engine(engine, i915) { + tsk[idx] = kthread_run(*fn, engine, + "igt/parallel:%s", + engine->name); + if (IS_ERR(tsk[idx])) { + err = PTR_ERR(tsk[idx]); + break; + } + get_task_struct(tsk[idx++]); + } + + yield(); /* start all threads before we kthread_stop() */ + + idx = 0; + for_each_uabi_engine(engine, i915) { + int status; + + if (IS_ERR(tsk[idx])) + break; + + status = kthread_stop(tsk[idx]); + if (status && !err) + err = status; + + put_task_struct(tsk[idx++]); + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + + kfree(tsk); return err; } @@ -1085,16 +1316,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) static int live_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; - struct smoketest t[I915_NUM_ENGINES]; - unsigned int ncpus = num_online_cpus(); + const unsigned int nengines = num_uabi_engines(i915); + const unsigned int ncpus = num_online_cpus(); unsigned long num_waits, num_fences; struct intel_engine_cs *engine; struct task_struct **threads; struct igt_live_test live; - enum intel_engine_id id; intel_wakeref_t wakeref; - struct drm_file *file; - unsigned int n; + struct smoketest *smoke; + unsigned int n, idx; + struct file *file; int ret = 0; /* @@ -1113,29 +1344,31 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_rpm; } - threads = kcalloc(ncpus * I915_NUM_ENGINES, - sizeof(*threads), - GFP_KERNEL); - if (!threads) { + smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); + if (!smoke) { ret = -ENOMEM; goto out_file; } - memset(&t[0], 0, sizeof(t[0])); - t[0].request_alloc = __live_request_alloc; - t[0].ncontexts = 64; - t[0].contexts = kmalloc_array(t[0].ncontexts, - sizeof(*t[0].contexts), - GFP_KERNEL); - if (!t[0].contexts) { + threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_smoke; + } + + smoke[0].request_alloc = __live_request_alloc; + smoke[0].ncontexts = 64; + smoke[0].contexts = kcalloc(smoke[0].ncontexts, + sizeof(*smoke[0].contexts), + GFP_KERNEL); + if (!smoke[0].contexts) { ret = -ENOMEM; goto out_threads; } - mutex_lock(&i915->drm.struct_mutex); - for (n = 0; n < t[0].ncontexts; n++) { - t[0].contexts[n] = live_context(i915, file); - if (!t[0].contexts[n]) { + for (n = 0; n < smoke[0].ncontexts; n++) { + smoke[0].contexts[n] = live_context(i915, file); + if (!smoke[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; } @@ -1145,45 +1378,48 @@ static int live_breadcrumbs_smoketest(void *arg) if (ret) goto out_contexts; - for_each_engine(engine, i915, id) { - t[id] = t[0]; - t[id].engine = engine; - t[id].max_batch = max_batches(t[0].contexts[0], engine); - if (t[id].max_batch < 0) { - ret = t[id].max_batch; - mutex_unlock(&i915->drm.struct_mutex); + idx = 0; + for_each_uabi_engine(engine, i915) { + smoke[idx] = smoke[0]; + smoke[idx].engine = engine; + smoke[idx].max_batch = + max_batches(smoke[0].contexts[0], engine); + if (smoke[idx].max_batch < 0) { + ret = smoke[idx].max_batch; goto out_flush; } /* One ring interleaved between requests from all cpus */ - t[id].max_batch /= num_online_cpus() + 1; + smoke[idx].max_batch /= num_online_cpus() + 1; pr_debug("Limiting batches to %d requests on %s\n", - t[id].max_batch, engine->name); + smoke[idx].max_batch, engine->name); for (n = 0; n < ncpus; n++) { struct task_struct *tsk; tsk = kthread_run(__igt_breadcrumbs_smoketest, - &t[id], "igt/%d.%d", id, n); + &smoke[idx], "igt/%d.%d", idx, n); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } get_task_struct(tsk); - threads[id * ncpus + n] = tsk; + threads[idx * ncpus + n] = tsk; } + + idx++; } - mutex_unlock(&i915->drm.struct_mutex); + yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: + idx = 0; num_waits = 0; num_fences = 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (n = 0; n < ncpus; n++) { - struct task_struct *tsk = threads[id * ncpus + n]; + struct task_struct *tsk = threads[idx * ncpus + n]; int err; if (!tsk) @@ -1196,21 +1432,22 @@ out_flush: put_task_struct(tsk); } - num_waits += atomic_long_read(&t[id].num_waits); - num_fences += atomic_long_read(&t[id].num_fences); + num_waits += atomic_long_read(&smoke[idx].num_waits); + num_fences += atomic_long_read(&smoke[idx].num_fences); + idx++; } pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); - mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; out_contexts: - mutex_unlock(&i915->drm.struct_mutex); - kfree(t[0].contexts); + kfree(smoke[0].contexts); out_threads: kfree(threads); +out_smoke: + kfree(smoke); out_file: - mock_file_free(i915, file); + fput(file); out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); @@ -1223,11 +1460,12 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nop_request), SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), + SUBTEST(live_parallel_engines), SUBTEST(live_empty_request), SUBTEST(live_breadcrumbs_smoketest), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index b18eaefef798..d3bf9eefb682 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,11 +23,14 @@ #include <linux/random.h> -#include "../i915_drv.h" -#include "../i915_selftest.h" +#include "gt/intel_gt_pm.h" +#include "i915_drv.h" +#include "i915_selftest.h" + +#include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { - .timeout_ms = 1000, + .timeout_ms = 500, }; int i915_mock_sanitycheck(void) @@ -54,6 +57,12 @@ enum { #undef selftest }; +enum { +#define selftest(name, func) perf_##name, +#include "i915_perf_selftests.h" +#undef selftest +}; + struct selftest { bool enabled; const char *name; @@ -75,6 +84,12 @@ static struct selftest live_selftests[] = { }; #undef selftest +#define selftest(n, f) [perf_##n] = { .name = #n, { .live = f } }, +static struct selftest perf_selftests[] = { +#include "i915_perf_selftests.h" +}; +#undef selftest + /* Embed the line number into the parameter name so that we can order tests */ #define selftest(n, func) selftest_0(n, func, param(n)) #define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) @@ -90,6 +105,13 @@ module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); #include "i915_live_selftests.h" #undef selftest_0 #undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __perf_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, perf_selftests[perf_##n].enabled, bool, 0400); +#include "i915_perf_selftests.h" +#undef selftest_0 +#undef param #undef selftest static void set_default_test_all(struct selftest *st, unsigned int count) @@ -183,7 +205,7 @@ int i915_live_selftests(struct pci_dev *pdev) if (!i915_selftest.live) return 0; - err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + err = run_selftests(live, pdev_to_i915(pdev)); if (err) { i915_selftest.live = err; return err; @@ -197,6 +219,27 @@ int i915_live_selftests(struct pci_dev *pdev) return 0; } +int i915_perf_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.perf) + return 0; + + err = run_selftests(perf, pdev_to_i915(pdev)); + if (err) { + i915_selftest.perf = err; + return err; + } + + if (i915_selftest.perf < 0) { + i915_selftest.perf = -ENOTTY; + return 1; + } + + return 0; +} + static bool apply_subtest_filter(const char *caller, const char *name) { char *filter, *sep, *tok; @@ -240,7 +283,65 @@ static bool apply_subtest_filter(const char *caller, const char *name) return result; } +int __i915_nop_setup(void *data) +{ + return 0; +} + +int __i915_nop_teardown(int err, void *data) +{ + return err; +} + +int __i915_live_setup(void *data) +{ + struct drm_i915_private *i915 = data; + + /* The selftests expect an idle system */ + if (intel_gt_pm_wait_for_idle(&i915->gt)) + return -EIO; + + return intel_gt_terminally_wedged(&i915->gt); +} + +int __i915_live_teardown(int err, void *data) +{ + struct drm_i915_private *i915 = data; + + if (igt_flush_test(i915)) + err = -EIO; + + i915_gem_drain_freed_objects(i915); + + return err; +} + +int __intel_gt_live_setup(void *data) +{ + struct intel_gt *gt = data; + + /* The selftests expect an idle system */ + if (intel_gt_pm_wait_for_idle(gt)) + return -EIO; + + return intel_gt_terminally_wedged(gt); +} + +int __intel_gt_live_teardown(int err, void *data) +{ + struct intel_gt *gt = data; + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_gem_drain_freed_objects(gt->i915); + + return err; +} + int __i915_subtests(const char *caller, + int (*setup)(void *data), + int (*teardown)(int err, void *data), const struct i915_subtest *st, unsigned int count, void *data) @@ -255,10 +356,17 @@ int __i915_subtests(const char *caller, if (!apply_subtest_filter(caller, st->name)) continue; + err = setup(data); + if (err) { + pr_err(DRIVER_NAME "/%s: setup failed for %s\n", + caller, st->name); + return err; + } + pr_info(DRIVER_NAME ": Running %s/%s\n", caller, st->name); GEM_TRACE("Running %s/%s\n", caller, st->name); - err = st->func(data); + err = teardown(st->func(data), data); if (err && err != -EINTR) { pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", caller, st->name, err); @@ -297,3 +405,6 @@ MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardw module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); + +module_param_named_unsafe(perf_selftests, i915_selftest.perf, int, 0400); +MODULE_PARM_DESC(perf_selftests, "Run performance orientated selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index fbc79b14823a..58b5f40a07dd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,6 +24,7 @@ #include <linux/prime_numbers.h> +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "i915_scatterlist.h" @@ -38,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != ctx->vm) { + if (vma->vm != rcu_access_pointer(ctx->vm)) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -113,11 +114,13 @@ static int create_vmas(struct drm_i915_private *i915, list_for_each_entry(obj, objects, st_link) { for (pinned = 0; pinned <= 1; pinned++) { list_for_each_entry(ctx, contexts, link) { - struct i915_address_space *vm = ctx->vm; + struct i915_address_space *vm; struct i915_vma *vma; int err; + vm = i915_gem_context_get_vm_rcu(ctx); vma = checked_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -170,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) @@ -193,6 +196,8 @@ static int igt_vma_create(void *arg) list_del_init(&ctx->link); mock_context_close(ctx); } + + cond_resched(); } end: @@ -341,6 +346,8 @@ static int igt_vma_pin1(void *arg) goto out; } } + + cond_resched(); } err = 0; @@ -597,6 +604,8 @@ static int igt_vma_rotate_remap(void *arg) } i915_vma_unpin(vma); + + cond_resched(); } } } @@ -617,7 +626,7 @@ static bool assert_partial(struct drm_i915_gem_object *obj, struct sgt_iter sgt; dma_addr_t dma; - for_each_sgt_dma(dma, sgt, vma->pages) { + for_each_sgt_daddr(dma, sgt, vma->pages) { dma_addr_t src; if (!size) { @@ -752,6 +761,8 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); nvma++; + + cond_resched(); } } @@ -823,13 +834,10 @@ int i915_vma_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); + mock_device_flush(i915); i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -871,8 +879,6 @@ static int igt_vma_remapped_gtt(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (t = types; *t; t++) { @@ -961,12 +967,13 @@ static int igt_vma_remapped_gtt(void *arg) } } i915_vma_unpin_iomap(vma); + + cond_resched(); } } out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_put(obj); return err; diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.c b/drivers/gpu/drm/i915/selftests/igt_atomic.c new file mode 100644 index 000000000000..fb506b699095 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_atomic.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + */ + +#include <linux/preempt.h> +#include <linux/bottom_half.h> +#include <linux/irqflags.h> + +#include "igt_atomic.h" + +static void __preempt_begin(void) +{ + preempt_disable(); +} + +static void __preempt_end(void) +{ + preempt_enable(); +} + +static void __softirq_begin(void) +{ + local_bh_disable(); +} + +static void __softirq_end(void) +{ + local_bh_enable(); +} + +static void __hardirq_begin(void) +{ + local_irq_disable(); +} + +static void __hardirq_end(void) +{ + local_irq_enable(); +} + +const struct igt_atomic_section igt_atomic_phases[] = { + { "preempt", __preempt_begin, __preempt_end }, + { "softirq", __softirq_begin, __softirq_end }, + { "hardirq", __hardirq_begin, __hardirq_end }, + { } +}; diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.h b/drivers/gpu/drm/i915/selftests/igt_atomic.h index 93ec89f487ec..1991798abf4b 100644 --- a/drivers/gpu/drm/i915/selftests/igt_atomic.h +++ b/drivers/gpu/drm/i915/selftests/igt_atomic.h @@ -6,51 +6,12 @@ #ifndef IGT_ATOMIC_H #define IGT_ATOMIC_H -#include <linux/preempt.h> -#include <linux/bottom_half.h> -#include <linux/irqflags.h> - -static void __preempt_begin(void) -{ - preempt_disable(); -} - -static void __preempt_end(void) -{ - preempt_enable(); -} - -static void __softirq_begin(void) -{ - local_bh_disable(); -} - -static void __softirq_end(void) -{ - local_bh_enable(); -} - -static void __hardirq_begin(void) -{ - local_irq_disable(); -} - -static void __hardirq_end(void) -{ - local_irq_enable(); -} - struct igt_atomic_section { const char *name; void (*critical_section_begin)(void); void (*critical_section_end)(void); }; -static const struct igt_atomic_section igt_atomic_phases[] = { - { "preempt", __preempt_begin, __preempt_end }, - { "softirq", __softirq_begin, __softirq_end }, - { "hardirq", __hardirq_begin, __hardirq_end }, - { } -}; +extern const struct igt_atomic_section igt_atomic_phases[]; #endif /* IGT_ATOMIC_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 5bfd1b2626a2..7b0939e3f007 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,38 +4,32 @@ * Copyright © 2018 Intel Corporation */ -#include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_selftest.h" #include "igt_flush_test.h" -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +int igt_flush_test(struct drm_i915_private *i915) { - int ret = i915_terminally_wedged(i915) ? -EIO : 0; - int repeat = !!(flags & I915_WAIT_LOCKED); + struct intel_gt *gt = &i915->gt; + int ret = intel_gt_is_wedged(gt) ? -EIO : 0; cond_resched(); - do { - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - GEM_TRACE("%pS timed out.\n", - __builtin_return_address(0)); - GEM_TRACE_DUMP(); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - repeat = 0; - ret = -EIO; - } - - /* Ensure we also flush after wedging. */ - if (flags & I915_WAIT_LOCKED) - i915_retire_requests(i915); - } while (repeat--); + intel_gt_set_wedged(gt); + ret = -EIO; + } return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h index 63e009927c43..7541fa74e641 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -9,6 +9,6 @@ struct drm_i915_private; -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); +int igt_flush_test(struct drm_i915_private *i915); #endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 3e902761cd16..c130010a7033 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "i915_drv.h" +#include "gt/intel_gt_requests.h" #include "../i915_selftest.h" #include "igt_flush_test.h" @@ -15,20 +16,16 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - t->i915 = i915; t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); @@ -37,7 +34,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->reset_global = i915_reset_count(&i915->gpu_error); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) t->reset_engine[id] = i915_reset_engine_count(&i915->gpu_error, engine); @@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) return -EIO; if (t->reset_global != i915_reset_count(&i915->gpu_error)) { @@ -62,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, &i915->gt, id) { if (t->reset_engine[id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index c0e9f99d50de..36ed42736c52 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,7 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H -#include "../i915_gem.h" +#include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.c b/drivers/gpu/drm/i915/selftests/igt_mmap.c new file mode 100644 index 000000000000..583a4ff8b8c9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.c @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_file.h> + +#include "i915_drv.h" +#include "igt_mmap.h" + +unsigned long igt_mmap_node(struct drm_i915_private *i915, + struct drm_vma_offset_node *node, + unsigned long addr, + unsigned long prot, + unsigned long flags) +{ + struct file *file; + int err; + + /* Pretend to open("/dev/dri/card0") */ + file = mock_drm_getfile(i915->drm.primary, O_RDWR); + if (IS_ERR(file)) + return PTR_ERR(file); + + err = drm_vma_node_allow(node, file->private_data); + if (err) { + addr = err; + goto out_file; + } + + addr = vm_mmap(file, addr, drm_vma_node_size(node) << PAGE_SHIFT, + prot, flags, drm_vma_node_offset_addr(node)); + + drm_vma_node_revoke(node, file->private_data); +out_file: + fput(file); + return addr; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.h b/drivers/gpu/drm/i915/selftests/igt_mmap.h new file mode 100644 index 000000000000..6e716cb59d7e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.h @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef IGT_MMAP_H +#define IGT_MMAP_H + +struct drm_i915_private; +struct drm_vma_offset_node; + +unsigned long igt_mmap_node(struct drm_i915_private *i915, + struct drm_vma_offset_node *node, + unsigned long addr, + unsigned long prot, + unsigned long flags); + +#endif /* IGT_MMAP_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 587df6fd4ffe..9f8590b868a9 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -7,47 +7,45 @@ #include "igt_reset.h" #include "gt/intel_engine.h" +#include "gt/intel_gt.h" #include "../i915_drv.h" -void igt_global_reset_lock(struct drm_i915_private *i915) +void igt_global_reset_lock(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - pr_debug("%s: current gpu_error=%08lx\n", - __func__, i915->gpu_error.flags); + pr_debug("%s: current gpu_error=%08lx\n", __func__, gt->reset.flags); - while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) - wait_event(i915->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, - &i915->gpu_error.flags)); + while (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { while (test_and_set_bit(I915_RESET_ENGINE + id, - &i915->gpu_error.flags)) - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_ENGINE + id, + >->reset.flags)) + wait_on_bit(>->reset.flags, I915_RESET_ENGINE + id, TASK_UNINTERRUPTIBLE); } } -void igt_global_reset_unlock(struct drm_i915_private *i915) +void igt_global_reset_unlock(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + for_each_engine(engine, gt, id) + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); - wake_up_all(&i915->gpu_error.reset_queue); + clear_bit(I915_RESET_BACKOFF, >->reset.flags); + wake_up_all(>->reset.queue); } -bool igt_force_reset(struct drm_i915_private *i915) +bool igt_force_reset(struct intel_gt *gt) { - i915_gem_set_wedged(i915); - i915_reset(i915, 0, NULL); + intel_gt_set_wedged(gt); + intel_gt_reset(gt, 0, NULL); - return !i915_reset_failed(i915); + return !intel_gt_is_wedged(gt); } diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.h b/drivers/gpu/drm/i915/selftests/igt_reset.h index 363bd853e50f..851873b67ab3 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.h +++ b/drivers/gpu/drm/i915/selftests/igt_reset.h @@ -7,10 +7,12 @@ #ifndef __I915_SELFTESTS_IGT_RESET_H__ #define __I915_SELFTESTS_IGT_RESET_H__ -#include "../i915_drv.h" +#include <linux/types.h> -void igt_global_reset_lock(struct drm_i915_private *i915); -void igt_global_reset_unlock(struct drm_i915_private *i915); -bool igt_force_reset(struct drm_i915_private *i915); +struct intel_gt; + +void igt_global_reset_lock(struct intel_gt *gt); +void igt_global_reset_unlock(struct intel_gt *gt); +bool igt_force_reset(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 1e59b543cf27..e8a58fe49c39 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,29 +3,28 @@ * * Copyright © 2018 Intel Corporation */ +#include "gt/intel_gt.h" #include "gem/selftests/igt_gem_utils.h" #include "igt_spinner.h" -int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915) +int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) { unsigned int mode; void *vaddr; int err; - GEM_BUG_ON(INTEL_GEN(i915) < 8); - memset(spin, 0, sizeof(*spin)); - spin->i915 = i915; + spin->gt = gt; - spin->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + spin->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(spin->hws)) { err = PTR_ERR(spin->hws); goto err; } - spin->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + spin->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(spin->obj)) { err = PTR_ERR(spin->obj); goto err_hws; @@ -39,7 +38,7 @@ int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915) } spin->seqno = memset(vaddr, 0xff, PAGE_SIZE); - mode = i915_coherent_map_type(i915); + mode = i915_coherent_map_type(gt->i915); vaddr = i915_gem_object_pin_map(spin->obj, mode); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); @@ -77,7 +76,10 @@ static int move_to_active(struct i915_vma *vma, int err; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, flags); + err = i915_request_await_object(rq, vma->obj, + flags & EXEC_OBJECT_WRITE); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, flags); i915_vma_unlock(vma); return err; @@ -85,20 +87,26 @@ static int move_to_active(struct i915_vma *vma, struct i915_request * igt_spinner_create_request(struct igt_spinner *spin, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, + struct intel_context *ce, u32 arbitration_command) { + struct intel_engine_cs *engine = ce->engine; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; + unsigned int flags; u32 *batch; int err; - vma = i915_vma_instance(spin->obj, ctx->vm, NULL); + GEM_BUG_ON(spin->gt != ce->vm->gt); + + if (!intel_engine_can_store_dword(ce->engine)) + return ERR_PTR(-ENODEV); + + vma = i915_vma_instance(spin->obj, ce->vm, NULL); if (IS_ERR(vma)) return ERR_CAST(vma); - hws = i915_vma_instance(spin->hws, ctx->vm, NULL); + hws = i915_vma_instance(spin->hws, ce->vm, NULL); if (IS_ERR(hws)) return ERR_CAST(hws); @@ -110,7 +118,7 @@ igt_spinner_create_request(struct igt_spinner *spin, if (err) goto unpin_vma; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto unpin_hws; @@ -126,28 +134,52 @@ igt_spinner_create_request(struct igt_spinner *spin, batch = spin->batch; - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(hws_address(hws, rq)); - *batch++ = upper_32_bits(hws_address(hws, rq)); + if (INTEL_GEN(rq->i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + } else if (INTEL_GEN(rq->i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = hws_address(hws, rq); + } else if (INTEL_GEN(rq->i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = hws_address(hws, rq); + } else { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = hws_address(hws, rq); + } *batch++ = rq->fence.seqno; *batch++ = arbitration_command; - *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + if (INTEL_GEN(rq->i915) >= 8) + *batch++ = MI_BATCH_BUFFER_START | BIT(8) | 1; + else if (IS_HASWELL(rq->i915)) + *batch++ = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW; + else if (INTEL_GEN(rq->i915) >= 6) + *batch++ = MI_BATCH_BUFFER_START; + else + *batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ - i915_gem_chipset_flush(spin->i915); + intel_gt_chipset_flush(engine->gt); if (engine->emit_init_breadcrumb && - rq->timeline->has_initial_breadcrumb) { + i915_request_timeline(rq)->has_initial_breadcrumb) { err = engine->emit_init_breadcrumb(rq); if (err) goto cancel_rq; } - err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); + flags = 0; + if (INTEL_GEN(rq->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); cancel_rq: if (err) { @@ -172,7 +204,7 @@ hws_seqno(const struct igt_spinner *spin, const struct i915_request *rq) void igt_spinner_end(struct igt_spinner *spin) { *spin->batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(spin->i915); + intel_gt_chipset_flush(spin->gt); } void igt_spinner_fini(struct igt_spinner *spin) diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index 34a88ac9b47a..ec62c9ef320b 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -14,21 +14,22 @@ #include "i915_request.h" #include "i915_selftest.h" +struct intel_gt; + struct igt_spinner { - struct drm_i915_private *i915; + struct intel_gt *gt; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; u32 *batch; void *seqno; }; -int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915); +int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt); void igt_spinner_fini(struct igt_spinner *spin); struct i915_request * igt_spinner_create_request(struct igt_spinner *spin, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, + struct intel_context *ce, u32 arbitration_command); void igt_spinner_end(struct igt_spinner *spin); diff --git a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h deleted file mode 100644 index 08e5ff11bbd9..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#ifndef IGT_WEDGE_ME_H -#define IGT_WEDGE_ME_H - -#include <linux/workqueue.h> - -#include "../i915_gem.h" - -struct drm_i915_private; - -struct igt_wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const char *name; -}; - -static void __igt_wedge_me(struct work_struct *work) -{ - struct igt_wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%s timed out, cancelling test.\n", w->name); - - GEM_TRACE("%s timed out.\n", w->name); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __igt_init_wedge(struct igt_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) -{ - w->i915 = i915; - w->name = name; - - INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __igt_fini_wedge(struct igt_wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define igt_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __igt_fini_wedge((W))) - -#endif /* IGT_WEDGE_ME_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c deleted file mode 100644 index 6ca8584cd64c..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_selftest.h" -#include "gem/i915_gem_pm.h" - -/* max doorbell number + negative test for each client type */ -#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) - -static struct intel_guc_client *clients[ATTEMPTS]; - -static bool available_dbs(struct intel_guc *guc, u32 priority) -{ - unsigned long offset; - unsigned long end; - u16 id; - - /* first half is used for normal priority, second half for high */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (priority <= GUC_CLIENT_PRIORITY_HIGH) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); - if (id < end) - return true; - - return false; -} - -static int check_all_doorbells(struct intel_guc *guc) -{ - u16 db_id; - - pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { - if (!doorbell_ok(guc, db_id)) { - pr_err("doorbell %d, not ok\n", db_id); - return -EIO; - } - } - - return 0; -} - -static int ring_doorbell_nop(struct intel_guc_client *client) -{ - struct guc_process_desc *desc = __get_process_desc(client); - int err; - - client->use_nop_wqi = true; - - spin_lock_irq(&client->wq_lock); - - guc_wq_item_append(client, 0, 0, 0, 0); - guc_ring_doorbell(client); - - spin_unlock_irq(&client->wq_lock); - - client->use_nop_wqi = false; - - /* if there are no issues GuC will update the WQ head and keep the - * WQ in active status - */ - err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10); - if (err) { - pr_err("doorbell %u ring failed!\n", client->doorbell_id); - return -EIO; - } - - if (desc->wq_status != WQ_STATUS_ACTIVE) { - pr_err("doorbell %u ring put WQ in bad state (%u)!\n", - client->doorbell_id, desc->wq_status); - return -EIO; - } - - return 0; -} - -/* - * Basic client sanity check, handy to validate create_clients. - */ -static int validate_client(struct intel_guc_client *client, - int client_priority, - bool is_preempt_client) -{ - struct drm_i915_private *dev_priv = guc_to_i915(client->guc); - struct i915_gem_context *ctx_owner = is_preempt_client ? - dev_priv->preempt_context : dev_priv->kernel_context; - - if (client->owner != ctx_owner || - client->engines != INTEL_INFO(dev_priv)->engine_mask || - client->priority != client_priority || - client->doorbell_id == GUC_DOORBELL_INVALID) - return -EINVAL; - else - return 0; -} - -static bool client_doorbell_in_sync(struct intel_guc_client *client) -{ - return !client || doorbell_ok(client->guc, client->doorbell_id); -} - -/* - * Check that we're able to synchronize guc_clients with their doorbells - * - * We're creating clients and reserving doorbells once, at module load. During - * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to - * GuC being reset. In other words - GuC clients are still around, but the - * status of their doorbells may be incorrect. This is the reason behind - * validating that the doorbells status expected by the driver matches what the - * GuC/HW have. - */ -static int igt_guc_clients(void *args) -{ - struct drm_i915_private *dev_priv = args; - intel_wakeref_t wakeref; - struct intel_guc *guc; - int err = 0; - - GEM_BUG_ON(!HAS_GUC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - /* - * Get rid of clients created during driver load because the test will - * recreate them. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - if (guc->execbuf_client || guc->preempt_client) { - pr_err("guc_clients_destroy lied!\n"); - err = -EINVAL; - goto unlock; - } - - err = guc_clients_create(guc); - if (err) { - pr_err("Failed to create clients\n"); - goto unlock; - } - GEM_BUG_ON(!guc->execbuf_client); - - err = validate_client(guc->execbuf_client, - GUC_CLIENT_PRIORITY_KMD_NORMAL, false); - if (err) { - pr_err("execbug client validation failed\n"); - goto out; - } - - if (guc->preempt_client) { - err = validate_client(guc->preempt_client, - GUC_CLIENT_PRIORITY_KMD_HIGH, true); - if (err) { - pr_err("preempt client validation failed\n"); - goto out; - } - } - - /* each client should now have reserved a doorbell */ - if (!has_doorbell(guc->execbuf_client) || - (guc->preempt_client && !has_doorbell(guc->preempt_client))) { - pr_err("guc_clients_create didn't reserve doorbells\n"); - err = -EINVAL; - goto out; - } - - /* Now enable the clients */ - guc_clients_enable(guc); - - /* each client should now have received a doorbell */ - if (!client_doorbell_in_sync(guc->execbuf_client) || - !client_doorbell_in_sync(guc->preempt_client)) { - pr_err("failed to initialize the doorbells\n"); - err = -EINVAL; - goto out; - } - - /* - * Basic test - an attempt to reallocate a valid doorbell to the - * client it is currently assigned should not cause a failure. - */ - err = create_doorbell(guc->execbuf_client); - -out: - /* - * Leave clean state for other test, plus the driver always destroy the - * clients during unload. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - guc_clients_create(guc); - guc_clients_enable(guc); -unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - return err; -} - -/* - * Create as many clients as number of doorbells. Note that there's already - * client(s)/doorbell(s) created during driver load, but this test creates - * its own and do not interact with the existing ones. - */ -static int igt_guc_doorbells(void *arg) -{ - struct drm_i915_private *dev_priv = arg; - intel_wakeref_t wakeref; - struct intel_guc *guc; - int i, err = 0; - u16 db_id; - - GEM_BUG_ON(!HAS_GUC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - for (i = 0; i < ATTEMPTS; i++) { - clients[i] = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->engine_mask, - i % GUC_CLIENT_PRIORITY_NUM, - dev_priv->kernel_context); - - if (!clients[i]) { - pr_err("[%d] No guc client\n", i); - err = -EINVAL; - goto out; - } - - if (IS_ERR(clients[i])) { - if (PTR_ERR(clients[i]) != -ENOSPC) { - pr_err("[%d] unexpected error\n", i); - err = PTR_ERR(clients[i]); - goto out; - } - - if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { - pr_err("[%d] non-db related alloc fail\n", i); - err = -EINVAL; - goto out; - } - - /* expected, ran out of dbs for this client type */ - continue; - } - - /* - * The check below is only valid because we keep a doorbell - * assigned during the whole life of the client. - */ - if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { - pr_err("[%d] more clients than doorbells (%d >= %d)\n", - i, clients[i]->stage_id, GUC_NUM_DOORBELLS); - err = -EINVAL; - goto out; - } - - err = validate_client(clients[i], - i % GUC_CLIENT_PRIORITY_NUM, false); - if (err) { - pr_err("[%d] client_alloc sanity check failed!\n", i); - err = -EINVAL; - goto out; - } - - db_id = clients[i]->doorbell_id; - - err = __guc_client_enable(clients[i]); - if (err) { - pr_err("[%d] Failed to create a doorbell\n", i); - goto out; - } - - /* doorbell id shouldn't change, we are holding the mutex */ - if (db_id != clients[i]->doorbell_id) { - pr_err("[%d] doorbell id changed (%d != %d)\n", - i, db_id, clients[i]->doorbell_id); - err = -EINVAL; - goto out; - } - - err = check_all_doorbells(guc); - if (err) - goto out; - - err = ring_doorbell_nop(clients[i]); - if (err) - goto out; - } - -out: - for (i = 0; i < ATTEMPTS; i++) - if (!IS_ERR_OR_NULL(clients[i])) { - __guc_client_disable(clients[i]); - guc_client_free(clients[i]); - } -unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - return err; -} - -int intel_guc_live_selftest(struct drm_i915_private *dev_priv) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_guc_clients), - SUBTEST(igt_guc_doorbells), - }; - - if (!USES_GUC_SUBMISSION(dev_priv)) - return 0; - - return i915_subtests(tests, dev_priv); -} diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c new file mode 100644 index 000000000000..3ef3620e0da5 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_gem_device.h" +#include "mock_region.h" + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_lmem.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_object_blt.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" +#include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" + +static void close_objects(struct intel_memory_region *mem, + struct list_head *objects) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + /* No polluting the memory region between tests */ + __i915_gem_object_put_pages(obj); + list_del(&obj->st_link); + i915_gem_object_put(obj); + } + + cond_resched(); + + i915_gem_drain_freed_objects(i915); +} + +static int igt_mock_fill(void *arg) +{ + struct intel_memory_region *mem = arg; + resource_size_t total = resource_size(&mem->region); + resource_size_t page_size; + resource_size_t rem; + unsigned long max_pages; + unsigned long page_num; + LIST_HEAD(objects); + int err = 0; + + page_size = mem->mm.chunk_size; + max_pages = div64_u64(total, page_size); + rem = total; + + for_each_prime_number_from(page_num, 1, max_pages) { + resource_size_t size = page_num * page_size; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_region(mem, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + rem -= size; + } + + if (err == -ENOMEM) + err = 0; + if (err == -ENXIO) { + if (page_num * page_size <= rem) { + pr_err("%s failed, space still left in region\n", + __func__); + err = -EINVAL; + } else { + err = 0; + } + } + + close_objects(mem, &objects); + + return err; +} + +static struct drm_i915_gem_object * +igt_object_create(struct intel_memory_region *mem, + struct list_head *objects, + u64 size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mem, size, flags); + if (IS_ERR(obj)) + return obj; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto put; + + list_add(&obj->st_link, objects); + return obj; + +put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static void igt_object_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); + list_del(&obj->st_link); + i915_gem_object_put(obj); +} + +static int igt_mock_contiguous(void *arg) +{ + struct intel_memory_region *mem = arg; + struct drm_i915_gem_object *obj; + unsigned long n_objects; + LIST_HEAD(objects); + LIST_HEAD(holes); + I915_RND_STATE(prng); + resource_size_t total; + resource_size_t min; + u64 target; + int err = 0; + + total = resource_size(&mem->region); + + /* Min size */ + obj = igt_object_create(mem, &objects, mem->mm.chunk_size, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s min object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Max size */ + obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s max object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Internal fragmentation should not bleed into the object size */ + target = i915_prandom_u64_state(&prng); + div64_u64_rem(target, total, &target); + target = round_up(target, PAGE_SIZE); + target = max_t(u64, PAGE_SIZE, target); + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != target) { + pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, + obj->base.size, target); + err = -EINVAL; + goto err_close_objects; + } + + if (obj->mm.pages->nents != 1) { + pr_err("%s object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Try to fragment the address space, such that half of it is free, but + * the max contiguous block size is SZ_64K. + */ + + target = SZ_64K; + n_objects = div64_u64(total, target); + + while (n_objects--) { + struct list_head *list; + + if (n_objects % 2) + list = &holes; + else + list = &objects; + + obj = igt_object_create(mem, list, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + } + + close_objects(mem, &holes); + + min = target; + target = total >> 1; + + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Even though we have enough free space, we don't have a big enough + * contiguous block. Make sure that holds true. + */ + + do { + bool should_fail = target > min; + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (should_fail != IS_ERR(obj)) { + pr_err("%s target allocation(%llx) mismatch\n", + __func__, target); + err = -EINVAL; + goto err_close_objects; + } + + target >>= 1; + } while (target >= mem->mm.chunk_size); + +err_close_objects: + list_splice_tail(&holes, &objects); + close_objects(mem, &objects); + return err; +} + +static int igt_gpu_write_dw(struct intel_context *ce, + struct i915_vma *vma, + u32 dword, + u32 value) +{ + return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), + vma->size >> PAGE_SHIFT, value); +} + +static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n = obj->base.size >> PAGE_SHIFT; + u32 *ptr; + int err; + + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + ptr += dword; + while (n--) { + if (*ptr != val) { + pr_err("base[%u]=%08x, val=%08x\n", + dword, *ptr, val); + err = -EINVAL; + break; + } + + ptr += PAGE_SIZE / sizeof(*ptr); + } + + i915_gem_object_unpin_map(obj); + return err; +} + +static int igt_gpu_write(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + unsigned int count; + struct i915_vma *vma; + int *order; + int i, n; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + vm = ce->vm; + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + return 0; + + order = i915_random_order(count * count, &prng); + if (!order) + return -ENOMEM; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_free; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_free; + + i = 0; + engines = i915_gem_context_lock_engines(ctx); + do { + u32 rng = prandom_u32_state(&prng); + u32 dword = offset_in_page(rng) / 4; + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = igt_gpu_write_dw(ce, vma, dword, rng); + if (err) + break; + + err = igt_cpu_check(obj, dword, rng); + if (err) + break; + } while (!__igt_timeout(end_time, NULL)); + i915_gem_context_unlock_engines(ctx); + +out_free: + kfree(order); + + if (err == -ENOMEM) + err = 0; + + return err; +} + +static int igt_lmem_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = 0; + + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_lmem_write_gpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct file *file; + I915_RND_STATE(prng); + u32 sz; + int err; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); + + obj = i915_gem_object_create_lmem(i915, sz, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_file; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + err = igt_gpu_write(ctx, obj); + if (err) + pr_err("igt_gpu_write failed(%d)\n", err); + + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); +out_file: + fput(file); + return err; +} + +static struct intel_engine_cs * +random_engine_class(struct drm_i915_private *i915, + unsigned int class, + struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for (engine = intel_engine_lookup_user(i915, class, 0); + engine && engine->uabi_class == class; + engine = rb_entry_safe(rb_next(&engine->uabi_node), + typeof(*engine), uabi_node)) + count++; + + count = i915_prandom_u32_max_state(count, prng); + return intel_engine_lookup_user(i915, class, count); +} + +static int igt_lmem_write_cpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + u32 bytes[] = { + 0, /* rng placeholder */ + sizeof(u32), + sizeof(u64), + 64, /* cl */ + PAGE_SIZE, + PAGE_SIZE - sizeof(u32), + PAGE_SIZE - sizeof(u64), + PAGE_SIZE - 64, + }; + struct intel_engine_cs *engine; + u32 *vaddr; + u32 sz; + u32 i; + int *order; + int count; + int err; + + engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); + if (!engine) + return 0; + + pr_info("%s: using %s\n", __func__, engine->name); + + sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); + sz = max_t(u32, 2 * PAGE_SIZE, sz); + + obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + /* Put the pages into a known state -- from the gpu for added fun */ + intel_engine_pm_get(engine); + err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); + intel_engine_pm_put(engine); + if (err) + goto out_unpin; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) + goto out_unpin; + + count = ARRAY_SIZE(bytes); + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto out_unpin; + } + + /* We want to throw in a random width/align */ + bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), + sizeof(u32)); + + i = 0; + do { + u32 offset; + u32 align; + u32 dword; + u32 size; + u32 val; + + size = bytes[order[i] % count]; + i = (i + 1) % (count * count); + + align = bytes[order[i] % count]; + i = (i + 1) % (count * count); + + align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); + + offset = igt_random_offset(&prng, 0, obj->base.size, + size, align); + + val = prandom_u32_state(&prng); + memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, + size / sizeof(u32)); + + /* + * Sample random dw -- don't waste precious time reading every + * single dw. + */ + dword = igt_random_offset(&prng, offset, + offset + size, + sizeof(u32), sizeof(u32)); + dword /= sizeof(u32); + if (vaddr[dword] != (val ^ 0xdeadbeaf)) { + pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", + __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, + size, align, offset); + err = -EINVAL; + break; + } + } while (!__igt_timeout(end_time, NULL)); + +out_unpin: + i915_gem_object_unpin_map(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int intel_memory_region_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_fill), + SUBTEST(igt_mock_contiguous), + }; + struct intel_memory_region *mem; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_unref; + } + + err = i915_subtests(tests, mem); + + intel_memory_region_put(mem); +out_unref: + drm_dev_put(&i915->drm); + return err; +} + +int intel_memory_region_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_write_cpu), + SUBTEST(igt_lmem_write_gpu), + }; + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 86815c6072a1..0e4e6be0101d 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -67,6 +67,7 @@ static int intel_shadow_table_check(void) } reg_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -101,6 +102,7 @@ int intel_uncore_mock_selftests(void) { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, + { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, }; int err, i; @@ -138,19 +140,19 @@ static int live_forcewake_ops(void *arg) } }; const struct reg *r; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_uncore_forcewake_domain *domain; - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; intel_wakeref_t wakeref; unsigned int tmp; int err = 0; - GEM_BUG_ON(i915->gt.awake); + GEM_BUG_ON(gt->awake); /* vlv/chv with their pcu behave differently wrt reads */ - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) { pr_debug("PCU fakes forcewake badly; skipping\n"); return 0; } @@ -168,15 +170,15 @@ static int live_forcewake_ops(void *arg) /* We have to pick carefully to get the exact behaviour we need */ for (r = registers; r->name; r++) - if (r->platforms & INTEL_INFO(i915)->gen_mask) + if (r->platforms & INTEL_INFO(gt->i915)->gen_mask) break; if (!r->name) { pr_debug("Forcewaked register not known for %s; skipping\n", - intel_platform_name(INTEL_INFO(i915)->platform)); + intel_platform_name(INTEL_INFO(gt->i915)->platform)); return 0; } - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(uncore->rpm); for_each_fw_domain(domain, uncore, tmp) { smp_store_mb(domain->active, false); @@ -186,7 +188,7 @@ static int live_forcewake_ops(void *arg) intel_uncore_fw_release_timer(&domain->timer); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { i915_reg_t mmio = _MMIO(engine->mmio_base + r->offset); u32 __iomem *reg = uncore->regs + engine->mmio_base + r->offset; enum forcewake_domains fw_domains; @@ -247,22 +249,22 @@ static int live_forcewake_ops(void *arg) } out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(uncore->rpm, wakeref); return err; } static int live_forcewake_domains(void *arg) { #define FW_RANGE 0x40000 - struct drm_i915_private *dev_priv = arg; - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = arg; + struct intel_uncore *uncore = gt->uncore; unsigned long *valid; u32 offset; int err; - if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && - !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) + if (!HAS_FPGA_DBG_UNCLAIMED(gt->i915) && + !IS_VALLEYVIEW(gt->i915) && + !IS_CHERRYVIEW(gt->i915)) return 0; /* @@ -281,7 +283,7 @@ static int live_forcewake_domains(void *arg) for (offset = 0; offset < FW_RANGE; offset += 4) { i915_reg_t reg = { offset }; - (void)I915_READ_FW(reg); + intel_uncore_posting_read_fw(uncore, reg); if (!check_for_unclaimed_mmio(uncore)) set_bit(offset, valid); } @@ -298,7 +300,7 @@ static int live_forcewake_domains(void *arg) check_for_unclaimed_mmio(uncore); - (void)I915_READ(reg); + intel_uncore_posting_read_fw(uncore, reg); if (check_for_unclaimed_mmio(uncore)) { pr_err("Unclaimed mmio read to register 0x%04x\n", offset); @@ -310,21 +312,23 @@ static int live_forcewake_domains(void *arg) return err; } +static int live_fw_table(void *arg) +{ + struct intel_gt *gt = arg; + + /* Confirm the table we load is still valid */ + return intel_fw_table_check(gt->uncore->fw_domains_table, + gt->uncore->fw_domains_table_entries, + INTEL_GEN(gt->i915) >= 9); +} + int intel_uncore_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_fw_table), SUBTEST(live_forcewake_ops), SUBTEST(live_forcewake_domains), }; - int err; - - /* Confirm the table we load is still valid */ - err = intel_fw_table_check(i915->uncore.fw_domains_table, - i915->uncore.fw_domains_table_entries, - INTEL_GEN(i915) >= 9); - if (err) - return err; - - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b976c12817c5..080b90b63d16 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -40,6 +40,7 @@ void __onstack_fence_init(struct i915_sw_fence *fence, __init_waitqueue_head(&fence->wait, name, key); atomic_set(&fence->pending, 1); + fence->error = 0; fence->flags = (unsigned long)nop_fence_notify; } diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c deleted file mode 100644 index 09c704153456..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_drm.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_drm.h" - -struct drm_file *mock_file(struct drm_i915_private *i915) -{ - struct file *filp; - struct inode *inode; - struct drm_file *file; - int err; - - inode = kzalloc(sizeof(*inode), GFP_KERNEL); - if (!inode) { - err = -ENOMEM; - goto err; - } - - inode->i_rdev = i915->drm.primary->index; - - filp = kzalloc(sizeof(*filp), GFP_KERNEL); - if (!filp) { - err = -ENOMEM; - goto err_inode; - } - - err = drm_open(inode, filp); - if (err) - goto err_filp; - - file = filp->private_data; - memset(&file->filp, POISON_INUSE, sizeof(file->filp)); - file->authenticated = true; - - kfree(filp); - kfree(inode); - return file; - -err_filp: - kfree(filp); -err_inode: - kfree(inode); -err: - return ERR_PTR(err); -} - -void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) -{ - struct file filp = { .private_data = file }; - - drm_release(NULL, &filp); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h index b39beee9f8f6..9916b6f95526 100644 --- a/drivers/gpu/drm/i915/selftests/mock_drm.h +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -25,7 +25,21 @@ #ifndef __MOCK_DRM_H #define __MOCK_DRM_H -struct drm_file *mock_file(struct drm_i915_private *i915); -void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); +#include <drm/drm_file.h> + +#include "i915_drv.h" + +struct drm_file; +struct file; + +static inline struct file *mock_file(struct drm_i915_private *i915) +{ + return mock_drm_getfile(i915->drm.primary, O_RDWR); +} + +static inline struct drm_file *to_drm_file(struct file *f) +{ + return f->private_data; +} #endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 64bc51400ae7..3b8986983afc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -25,60 +25,49 @@ #include <linux/pm_domain.h> #include <linux/pm_runtime.h> +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" +#include "intel_memory_region.h" #include "mock_request.h" #include "mock_gem_device.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "mock_region.h" #include "gem/selftests/mock_context.h" #include "gem/selftests/mock_gem_object.h" void mock_device_flush(struct drm_i915_private *i915) { + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - do { - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (i915_retire_requests(i915)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) { struct drm_i915_private *i915 = to_i915(dev); - struct intel_engine_cs *engine; - enum intel_engine_id id; - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - - flush_work(&i915->gem.idle_work); - i915_gem_drain_workqueue(i915); - - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) - mock_engine_free(engine); - i915_gem_contexts_fini(i915); - mutex_unlock(&i915->drm.struct_mutex); + intel_gt_driver_remove(&i915->gt); - i915_timelines_fini(i915); + i915_gem_driver_release__contexts(i915); - drain_workqueue(i915->wq); + i915_gem_drain_workqueue(i915); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); - mutex_unlock(&i915->drm.struct_mutex); - destroy_workqueue(i915->wq); - i915_gemfs_fini(i915); + intel_gt_driver_late_release(&i915->gt); + intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); @@ -102,14 +91,6 @@ static void release_dev(struct device *dev) kfree(pdev); } -static void mock_retire_work_handler(struct work_struct *work) -{ -} - -static void mock_idle_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -177,15 +158,15 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; - mock_uncore_init(&i915->uncore); + mkwrite_device_info(i915)->memory_regions = REGION_SMEM; + intel_memory_regions_hw_probe(i915); + + mock_uncore_init(&i915->uncore, i915); + i915_gem_init__mm(i915); - intel_gt_pm_init(i915); + intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ - - init_waitqueue_head(&i915->gpu_error.wait_queue); - init_waitqueue_head(&i915->gpu_error.reset_queue); - init_srcu_struct(&i915->gpu_error.reset_backoff_srcu); - mutex_init(&i915->gpu_error.wedge_mutex); + i915->gt.awake = -ENODEV; i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) @@ -193,20 +174,8 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); - - i915->gt.awake = true; - - i915_timelines_init(i915); - - INIT_LIST_HEAD(&i915->gt.active_rings); - INIT_LIST_HEAD(&i915->gt.closed_vma); - spin_lock_init(&i915->gt.closed_lock); - - mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915, &i915->ggtt); + i915->gt.vm = i915_vm_get(&i915->ggtt.vm); mkwrite_device_info(i915)->engine_mask = BIT(0); @@ -214,28 +183,21 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->engine[RCS0]) goto err_unlock; - i915->kernel_context = mock_context(i915, NULL); - if (!i915->kernel_context) - goto err_engine; - if (mock_engine_init(i915->engine[RCS0])) goto err_context; - mutex_unlock(&i915->drm.struct_mutex); - - WARN_ON(i915_gemfs_init(i915)); + __clear_bit(I915_WEDGED, &i915->gt.reset.flags); + intel_engines_driver_register(i915); return i915; err_context: - i915_gem_contexts_fini(i915); -err_engine: - mock_engine_free(i915->engine[RCS0]); + intel_gt_driver_remove(&i915->gt); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - i915_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: + intel_gt_driver_late_release(&i915->gt); + intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); drm_dev_fini(&i915->drm); put_device: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index f625c307a406..edc5e3dda8ca 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,7 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->flags |= I915_VMA_LOCAL_BIND; + set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma)); return 0; } @@ -55,6 +55,11 @@ static void mock_cleanup(struct i915_address_space *vm) { } +static void mock_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) { struct i915_ppgtt *ppgtt; @@ -63,13 +68,14 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) if (!ppgtt) return NULL; + ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); - ppgtt->vm.clear_range = nop_clear_range; + ppgtt->vm.clear_range = mock_clear_range; ppgtt->vm.insert_page = mock_insert_page; ppgtt->vm.insert_entries = mock_insert_entries; ppgtt->vm.cleanup = mock_cleanup; @@ -86,7 +92,7 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); return 0; } @@ -98,6 +104,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) { memset(ggtt, 0, sizeof(*ggtt)); + ggtt->vm.gt = &i915->gt; ggtt->vm.i915 = i915; ggtt->vm.is_ggtt = true; @@ -105,7 +112,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->vm.total = 4096 * PAGE_SIZE; - ggtt->vm.clear_range = nop_clear_range; + ggtt->vm.clear_range = mock_clear_range; ggtt->vm.insert_page = mock_insert_page; ggtt->vm.insert_entries = mock_insert_entries; ggtt->vm.cleanup = mock_cleanup; @@ -116,6 +123,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->vm.vma_ops.clear_pages = clear_pages; i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); + i915->gt.ggtt = ggtt; } void mock_fini_ggtt(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h index 3387393286de..e3f224f43beb 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.h +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -25,6 +25,9 @@ #ifndef __MOCK_GTT_H #define __MOCK_GTT_H +struct drm_i915_private; +struct i915_ggtt; + void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt); void mock_fini_ggtt(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c new file mode 100644 index 000000000000..b2ad41c27e67 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_region.h" +#include "intel_memory_region.h" + +#include "mock_region.h" + +static const struct drm_i915_gem_object_ops mock_region_obj_ops = { + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +static struct drm_i915_gem_object * +mock_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); + + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} + +static const struct intel_memory_region_ops mock_region_ops = { + .init = intel_memory_region_init_buddy, + .release = intel_memory_region_release_buddy, + .create_object = mock_object_create, +}; + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start) +{ + return intel_memory_region_create(i915, start, size, min_page_size, + io_start, &mock_region_ops); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h new file mode 100644 index 000000000000..329bf74dfaca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __MOCK_REGION_H +#define __MOCK_REGION_H + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_memory_region; + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start); + +#endif /* !__MOCK_REGION_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 9390fc09984b..09f747228dff 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -28,14 +28,12 @@ #include "mock_request.h" struct i915_request * -mock_request(struct intel_engine_cs *engine, - struct i915_gem_context *context, - unsigned long delay) +mock_request(struct intel_context *ce, unsigned long delay) { struct i915_request *request; /* NB the i915->requests slab cache is enlarged to fit mock_request */ - request = igt_request_alloc(context, engine); + request = intel_context_create_request(ce); if (IS_ERR(request)) return NULL; diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index 4acf0211df20..8907b60c290d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -30,9 +30,7 @@ #include "../i915_request.h" struct i915_request * -mock_request(struct intel_engine_cs *engine, - struct i915_gem_context *context, - unsigned long delay); +mock_request(struct intel_context *ce, unsigned long delay); bool mock_cancel_request(struct i915_request *request); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h deleted file mode 100644 index b6deaa61110d..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#ifndef __MOCK_TIMELINE__ -#define __MOCK_TIMELINE__ - -struct i915_timeline; - -void mock_timeline_init(struct i915_timeline *timeline, u64 context); -void mock_timeline_fini(struct i915_timeline *timeline); - -#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c index ff8999c63a12..ca57e4008701 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.c +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -39,8 +39,11 @@ __nop_read(16) __nop_read(32) __nop_read(64) -void mock_uncore_init(struct intel_uncore *uncore) +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915) { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, nop); - ASSIGN_READ_MMIO_VFUNCS(uncore, nop); + intel_uncore_init_early(uncore, i915); + + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop); } diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h index dacb36b5ffcd..7acf1ef4d488 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.h +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -25,6 +25,10 @@ #ifndef __MOCK_UNCORE_H #define __MOCK_UNCORE_H -void mock_uncore_init(struct intel_uncore *uncore); +struct drm_i915_private; +struct intel_uncore; + +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915); #endif /* !__MOCK_UNCORE_H */ |