summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h183
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c325
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c452
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c251
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c249
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c454
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c147
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c348
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c710
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c252
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c219
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c347
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c215
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c442
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c1206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c883
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h117
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c895
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h351
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c525
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c438
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c395
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c498
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/arct_reg_init.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c182
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c476
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c605
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2608
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c978
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c408
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c830
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c586
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c827
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c641
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c271
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c1602
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c305
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c465
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c534
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c1005
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c732
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c472
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c373
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c700
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c855
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1799
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h1
204 files changed, 25669 insertions, 8772 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index f6e5c0282fc1..9375e7f12420 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
@@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- depends on HMM_MIRROR
+ depends on MMU
+ select HMM_MIRROR
+ select MMU_NOTIFIER
help
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 56e084367b93..c2bbcdd9c875 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,8 +53,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
- amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
- amdgpu_vm_sdma.o amdgpu_discovery.o
+ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
+ amdgpu_umc.o smu_v11_0_i2c.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
@@ -66,7 +67,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
- vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o
+ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
+ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o
# add DF block
amdgpu-y += \
@@ -77,9 +79,13 @@ amdgpu-y += \
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
- gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \
+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
+# add UMC block
+amdgpu-y += \
+ umc_v6_1.o umc_v6_0.o
+
# add IH block
amdgpu-y += \
amdgpu_irq.o \
@@ -95,7 +101,8 @@ amdgpu-y += \
amdgpu_psp.o \
psp_v3_1.o \
psp_v10_0.o \
- psp_v11_0.o
+ psp_v11_0.o \
+ psp_v12_0.o
# add SMC block
amdgpu-y += \
@@ -113,6 +120,7 @@ amdgpu-y += \
amdgpu_rlc.o \
gfx_v8_0.o \
gfx_v9_0.o \
+ gfx_v9_4.o \
gfx_v10_0.o
# add async DMA block
@@ -140,14 +148,20 @@ amdgpu-y += \
vce_v3_0.o \
vce_v4_0.o
-# add VCN block
+# add VCN and JPEG block
amdgpu-y += \
amdgpu_vcn.o \
vcn_v1_0.o \
- vcn_v2_0.o
+ vcn_v2_0.o \
+ vcn_v2_5.o \
+ amdgpu_jpeg.o \
+ jpeg_v1_0.o \
+ jpeg_v2_0.o \
+ jpeg_v2_5.o
# add ATHUB block
amdgpu-y += \
+ athub_v1_0.o \
athub_v2_0.o
# add amdkfd interfaces
@@ -162,6 +176,7 @@ amdgpu-y += \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o \
+ amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_gfx_v10.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8199d201b43a..da3bcff61b97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -69,10 +69,12 @@
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
+#include "amdgpu_jpeg.h"
#include "amdgpu_mn.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_nbio.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
@@ -86,6 +88,9 @@
#include "amdgpu_smu.h"
#include "amdgpu_discovery.h"
#include "amdgpu_mes.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_mmhub.h"
+#include "amdgpu_df.h"
#define MAX_GPU_INSTANCE 16
@@ -104,6 +109,8 @@ struct amdgpu_mgpu_info
uint32_t num_apu;
};
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256
+
/*
* Modules parameters.
*/
@@ -120,6 +127,7 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
+extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@@ -133,6 +141,7 @@ extern int amdgpu_vm_fragment_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
+extern int amdgpu_exp_hw_support;
extern int amdgpu_dc;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
@@ -144,11 +153,7 @@ extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
-extern int amdgpu_ngg;
-extern int amdgpu_prim_buf_per_se;
-extern int amdgpu_pos_buf_per_se;
-extern int amdgpu_cntl_sb_buf_per_se;
-extern int amdgpu_param_buf_per_se;
+extern uint amdgpu_force_long_training;
extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
@@ -165,6 +170,12 @@ extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_noretry;
+extern int amdgpu_force_asic_type;
+#ifdef CONFIG_HSA_AMD
+extern int sched_policy;
+#else
+static const int sched_policy = KFD_SCHED_POLICY_HWS;
+#endif
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -281,6 +292,9 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
+#define HW_REV(_Major, _Minor, _Rev) \
+ ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
+
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
@@ -423,7 +437,6 @@ struct amdgpu_fpriv {
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
@@ -475,7 +488,6 @@ struct amdgpu_cs_parser {
uint64_t bytes_moved_vis_threshold;
uint64_t bytes_moved;
uint64_t bytes_moved_vis;
- struct amdgpu_bo_list_entry *evictable;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
@@ -532,6 +544,14 @@ struct amdgpu_allowed_register_entry {
bool grbm_indexed;
};
+enum amd_reset_method {
+ AMD_RESET_METHOD_LEGACY = 0,
+ AMD_RESET_METHOD_MODE0,
+ AMD_RESET_METHOD_MODE1,
+ AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_BACO
+};
+
/*
* ASIC specific functions.
*/
@@ -543,6 +563,7 @@ struct amdgpu_asic_funcs {
u32 sh_num, u32 reg_offset, u32 *value);
void (*set_vga_state)(struct amdgpu_device *adev, bool state);
int (*reset)(struct amdgpu_device *adev);
+ enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
/* get the reference clock */
u32 (*get_xclk)(struct amdgpu_device *adev);
/* MM block clocks */
@@ -569,6 +590,8 @@ struct amdgpu_asic_funcs {
bool (*need_reset_on_init)(struct amdgpu_device *adev);
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ /* device supports BACO */
+ bool (*supports_baco)(struct amdgpu_device *adev);
};
/*
@@ -613,6 +636,10 @@ struct amdgpu_fw_vram_usage {
u64 size;
struct amdgpu_bo *reserved_bo;
void *va;
+
+ /* GDDR6 training support flag.
+ */
+ bool mem_train_support;
};
/*
@@ -627,91 +654,29 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
-
-/*
- * amdgpu nbio functions
- *
- */
-struct nbio_hdp_flush_reg {
- u32 ref_and_mask_cp0;
- u32 ref_and_mask_cp1;
- u32 ref_and_mask_cp2;
- u32 ref_and_mask_cp3;
- u32 ref_and_mask_cp4;
- u32 ref_and_mask_cp5;
- u32 ref_and_mask_cp6;
- u32 ref_and_mask_cp7;
- u32 ref_and_mask_cp8;
- u32 ref_and_mask_cp9;
- u32 ref_and_mask_sdma0;
- u32 ref_and_mask_sdma1;
-};
-
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
};
-struct amdgpu_nbio_funcs {
- const struct nbio_hdp_flush_reg *hdp_flush_reg;
- u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
- u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
- u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
- u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
- u32 (*get_rev_id)(struct amdgpu_device *adev);
- void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
- void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
- u32 (*get_memsize)(struct amdgpu_device *adev);
- void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
- bool use_doorbell, int doorbell_index, int doorbell_size);
- void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
- int doorbell_index);
- void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
- bool enable);
- void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
- bool enable);
- void (*ih_doorbell_range)(struct amdgpu_device *adev,
- bool use_doorbell, int doorbell_index);
- void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
- bool enable);
- void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
- bool enable);
- void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
- void (*ih_control)(struct amdgpu_device *adev);
- void (*init_registers)(struct amdgpu_device *adev);
- void (*detect_hw_virt)(struct amdgpu_device *adev);
- void (*remap_hdp_registers)(struct amdgpu_device *adev);
-};
-
-struct amdgpu_df_funcs {
- void (*sw_init)(struct amdgpu_device *adev);
- void (*enable_broadcast_mode)(struct amdgpu_device *adev,
- bool enable);
- u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
- u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
- void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
- bool enable);
- void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
- void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
- bool enable);
- int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
- int is_enable);
- int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
- int is_disable);
- void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
- uint64_t *count);
-};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
GC_HWIP = 1,
HDP_HWIP,
SDMA0_HWIP,
SDMA1_HWIP,
+ SDMA2_HWIP,
+ SDMA3_HWIP,
+ SDMA4_HWIP,
+ SDMA5_HWIP,
+ SDMA6_HWIP,
+ SDMA7_HWIP,
MMHUB_HWIP,
ATHUB_HWIP,
NBIO_HWIP,
@@ -719,6 +684,7 @@ enum amd_hw_ip_block_type {
MP1_HWIP,
UVD_HWIP,
VCN_HWIP = UVD_HWIP,
+ JPEG_HWIP = VCN_HWIP,
VCE_HWIP,
DF_HWIP,
DCE_HWIP,
@@ -728,10 +694,12 @@ enum amd_hw_ip_block_type {
NBIF_HWIP,
THM_HWIP,
CLK_HWIP,
+ UMC_HWIP,
+ RSMU_HWIP,
MAX_HWIP
};
-#define HWIP_MAX_INSTANCE 6
+#define HWIP_MAX_INSTANCE 8
struct amd_powerplay {
void *pp_handle;
@@ -758,7 +726,6 @@ struct amdgpu_device {
int usec_timeout;
const struct amdgpu_asic_funcs *asic_funcs;
bool shutdown;
- bool need_dma32;
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
@@ -783,6 +750,7 @@ struct amdgpu_device {
uint8_t *bios;
uint32_t bios_size;
struct amdgpu_bo *stolen_vga_memory;
+ struct amdgpu_bo *discovery_memory;
uint32_t bios_scratch_reg_offset;
uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
@@ -803,6 +771,8 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg64_t pcie_rreg64;
+ amdgpu_wreg64_t pcie_wreg64;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
@@ -836,6 +806,7 @@ struct amdgpu_device {
dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
+ unsigned num_vmhubs;
/* memory management */
struct amdgpu_mman mman;
@@ -888,6 +859,12 @@ struct amdgpu_device {
u32 cg_flags;
u32 pg_flags;
+ /* nbio */
+ struct amdgpu_nbio nbio;
+
+ /* mmhub */
+ struct amdgpu_mmhub mmhub;
+
/* gfx */
struct amdgpu_gfx gfx;
@@ -903,6 +880,9 @@ struct amdgpu_device {
/* vcn */
struct amdgpu_vcn vcn;
+ /* jpeg */
+ struct amdgpu_jpeg jpeg;
+
/* firmwares */
struct amdgpu_firmware firmware;
@@ -915,6 +895,9 @@ struct amdgpu_device {
/* KFD */
struct amdgpu_kfd_dev kfd;
+ /* UMC */
+ struct amdgpu_umc umc;
+
/* display related functionality */
struct amdgpu_display_manager dm;
@@ -925,6 +908,9 @@ struct amdgpu_device {
bool enable_mes;
struct amdgpu_mes mes;
+ /* df */
+ struct amdgpu_df df;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
int num_ip_blocks;
struct mutex mn_lock;
@@ -938,9 +924,6 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
- const struct amdgpu_nbio_funcs *nbio_funcs;
- const struct amdgpu_df_funcs *df_funcs;
-
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
@@ -965,14 +948,15 @@ struct amdgpu_device {
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;
bool in_gpu_reset;
+ enum pp_mp1_state mp1_state;
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
+ struct mutex notifier_lock;
+
int asic_reset_res;
struct work_struct xgmi_reset_work;
- bool in_baco_reset;
-
long gfx_timeout;
long sdma_timeout;
long video_timeout;
@@ -980,6 +964,14 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
+ /* enable runtime pm on the device */
+ bool runpm;
+
+ bool pm_sysfs_en;
+ bool ucode_sysfs_en;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -994,6 +986,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
void amdgpu_device_fini(struct amdgpu_device *adev);
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ uint32_t *buf, size_t size, bool write);
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
@@ -1015,10 +1009,14 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define AMDGPU_REGS_IDX (1<<0)
#define AMDGPU_REGS_NO_KIQ (1<<1)
+#define AMDGPU_REGS_KIQ (1<<2)
#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ)
+#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ)
+
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1033,6 +1031,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
+#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@ -1093,6 +1093,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
*/
#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
+#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
@@ -1110,6 +1111,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
+
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1125,9 +1129,12 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
-bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_supports_boco(struct drm_device *dev);
+bool amdgpu_device_supports_baco(struct drm_device *dev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
+int amdgpu_device_baco_enter(struct drm_device *dev);
+int amdgpu_device_baco_exit(struct drm_device *dev);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
@@ -1165,8 +1172,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index eba42c752bca..12247a32f9ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -189,7 +189,7 @@ static int acp_hw_init(void *handle)
u32 val = 0;
u32 count = 0;
struct device *dev;
- struct i2s_platform_data *i2s_pdata;
+ struct i2s_platform_data *i2s_pdata = NULL;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -231,20 +231,21 @@ static int acp_hw_init(void *handle)
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
- if (adev->acp.acp_cell == NULL)
- return -ENOMEM;
+ if (adev->acp.acp_cell == NULL) {
+ r = -ENOMEM;
+ goto failure;
+ }
adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
if (adev->acp.acp_res == NULL) {
- kfree(adev->acp.acp_cell);
- return -ENOMEM;
+ r = -ENOMEM;
+ goto failure;
}
i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (i2s_pdata == NULL) {
- kfree(adev->acp.acp_res);
- kfree(adev->acp.acp_cell);
- return -ENOMEM;
+ r = -ENOMEM;
+ goto failure;
}
switch (adev->asic_type) {
@@ -341,14 +342,14 @@ static int acp_hw_init(void *handle)
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
ACP_DEVS);
if (r)
- return r;
+ goto failure;
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
if (r) {
dev_err(dev, "Failed to add dev to genpd\n");
- return r;
+ goto failure;
}
}
@@ -367,7 +368,8 @@ static int acp_hw_init(void *handle)
break;
if (--count == 0) {
dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto failure;
}
udelay(100);
}
@@ -384,7 +386,8 @@ static int acp_hw_init(void *handle)
break;
if (--count == 0) {
dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto failure;
}
udelay(100);
}
@@ -393,6 +396,13 @@ static int acp_hw_init(void *handle)
val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
return 0;
+
+failure:
+ kfree(i2s_pdata);
+ kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_cell);
+ kfree(adev->acp.acp_genpd);
+ return r;
}
/**
@@ -517,7 +527,7 @@ static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = state == AMD_PG_STATE_GATE ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
if (adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_powergating_by_smu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9fa4f25a3745..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -63,40 +63,10 @@ void amdgpu_amdkfd_fini(void)
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
{
- const struct kfd2kgd_calls *kfd2kgd;
-
- switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_KAVERI:
- case CHIP_HAWAII:
- kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
- break;
-#endif
- case CHIP_CARRIZO:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
- break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
- break;
- case CHIP_NAVI10:
- kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
- break;
- default:
- dev_info(adev->dev, "kfd not supported on this ASIC\n");
- return;
- }
+ bool vf = amdgpu_sriov_vf(adev);
adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
- adev->pdev, kfd2kgd);
+ adev->pdev, adev->asic_type, vf);
if (adev->kfd.dev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
@@ -160,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
- /* remove the KIQ bit as well */
- if (adev->gfx.kiq.ring.sched.ready)
- clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
- adev->gfx.kiq.ring.me - 1,
- adev->gfx.kiq.ring.pipe,
- adev->gfx.kiq.ring.queue),
- gpu_resources.queue_bitmap);
-
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
*/
@@ -197,7 +159,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->doorbell_index.last_non_cp;
}
- kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
+ kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources);
}
}
@@ -651,11 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->switch_power_profile)
- amdgpu_dpm_switch_power_profile(adev,
- PP_SMC_POWER_PROFILE_COMPUTE,
- !idle);
+ amdgpu_dpm_switch_power_profile(adev,
+ PP_SMC_POWER_PROFILE_COMPUTE,
+ !idle);
}
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
@@ -668,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ if (adev->family == AMDGPU_FAMILY_AI) {
+ int i;
+
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+ } else {
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ }
+
+ return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint32_t flush_type = 0;
+ bool all_hub = false;
+
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->asic_type == CHIP_VEGA20)
+ flush_type = 2;
+
+ if (adev->family == AMDGPU_FAMILY_AI)
+ all_hub = true;
+
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -700,33 +692,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
return 0;
}
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
-{
- return NULL;
-}
-
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g)
+ unsigned int asic_type, bool vf)
{
return NULL;
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b6076d19e442..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -57,7 +57,7 @@ struct kgd_mem {
unsigned int mapped_to_gpu_memory;
uint64_t va;
- uint32_t mapping_flags;
+ uint32_t alloc_flags;
atomic_t invalid;
struct amdkfd_process_info *process_info;
@@ -136,11 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -178,10 +175,17 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
+/* Read user wptr from a specified user address space with page fault
+ * disabled. The memory must be pinned and mapped to the hardware when
+ * this is called in hqd_load functions, so it should never fault in
+ * the first place. This resolves a circular lock dependency involving
+ * four locks, including the DQM lock and mmap_sem.
+ */
#define read_user_wptr(mmptr, wptr, dst) \
({ \
bool valid = false; \
if ((mmptr) && (wptr)) { \
+ pagefault_disable(); \
if ((mmptr) == current->mm) { \
valid = !get_user((dst), (wptr)); \
} else if (current->mm == NULL) { \
@@ -189,6 +193,7 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
valid = !get_user((dst), (wptr)); \
unuse_mm(mmptr); \
} \
+ pagefault_enable(); \
} \
valid; \
})
@@ -239,8 +244,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g);
+ unsigned int asic_type, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
new file mode 100644
index 000000000000..4bcc175a149d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/mmu_context.h>
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "sdma0/sdma0_4_2_2_offset.h"
+#include "sdma0/sdma0_4_2_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_2_offset.h"
+#include "sdma1/sdma1_4_2_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
+
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ /* fall through */
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+ break;
+ case 2:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ break;
+ case 3:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+ break;
+ case 4:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+ mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+ break;
+ case 5:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+ mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+ break;
+ case 6:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+ mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+ break;
+ case 7:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+ mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+ break;
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+ gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+const struct kfd2kgd_calls arcturus_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_gfx_v9_address_watch_disable,
+ .address_watch_execute = kgd_gfx_v9_address_watch_execute,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .get_tile_config = kgd_gfx_v9_get_tile_config,
+ .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 0723f800e815..a7b17c8deb00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -19,19 +19,9 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
-#include <linux/firmware.h>
#include <linux/mmu_context.h>
-#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
-#include "soc15_hw_ip.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "navi10_enum.h"
@@ -43,6 +33,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include "gfxhub_v2_0.h"
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -51,63 +42,6 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-#if 0
-static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
-#endif
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -140,37 +74,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .get_tile_config = amdgpu_amdkfd_get_tile_config,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -204,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
- unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
- queue_id) & 31;
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
- return ((uint32_t)1) << bit;
+ return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
@@ -251,11 +154,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
ATC_VMID0_PASID_MAPPING__VALID_MASK;
pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
- /*
- * need to do this twice, once for gfx and once for mmhub
- * for ATC add 16 to VMID for mmhub, for IH different registers.
- * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
- */
pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
@@ -307,11 +205,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
- uint32_t base[2] = {
+ uint32_t sdma_engine_reg_base[2] = {
SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
/* On gfx10, mmSDMA1_xxx registers are defined NOT based
@@ -323,12 +221,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
SOC15_REG_OFFSET(SDMA1, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
};
- uint32_t retval;
- retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
- mmSDMA0_RLC0_RB_CNTL);
+ uint32_t retval = sdma_engine_reg_base[engine_id]
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
return retval;
}
@@ -370,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
acquire_queue(kgd, pipe_id, queue_id);
- /* HIQ is set during driver init period with vmid set to 0*/
- if (m->cp_hqd_vmid == 0) {
- uint32_t value, mec, pipe;
-
- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
- mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
- value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
- ((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
- }
-
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -434,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
lower_32_bits((uint64_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uint64_t)wptr));
- pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- get_queue_mask(adev, pipe_id, queue_id));
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
@@ -452,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
+static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v10_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
static int kgd_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
@@ -489,72 +426,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
- sdmax_gfx_context_cntl = m->sdma_engine_id ?
- SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
- SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- data = RREG32(sdmax_gfx_context_cntl);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(sdmax_gfx_context_cntl, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -564,28 +496,26 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
- pr_debug("sdma base addr %x\n", sdma_base_addr);
-
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -619,14 +549,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -747,157 +677,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
- + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- uint32_t req = (1 << vmid) |
- (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */
- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK |
- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK |
- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK |
- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK |
- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK;
-
- mutex_lock(&adev->srbm_mutex);
-
- /* Use light weight invalidation.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate only GCHUB as
- * SDMA is now moved to GCHUB
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32),
- 0xffffffff);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32),
- 0x0000001f);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req);
-
- while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) &
- (1 << vmid)))
- cpu_relax();
-
- mutex_unlock(&adev->srbm_mutex);
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
- if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
- if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
- == pasid) {
- write_vmid_invalidate_request(kgd, vmid);
- break;
- }
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- write_vmid_invalidate_request(kgd, vmid);
- return 0;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -950,7 +775,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
@@ -958,18 +782,30 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
return;
}
- /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
- lower_32_bits(adev->vm_manager.max_pfn - 1));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
- upper_32_bits(adev->vm_manager.max_pfn - 1));
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+ /* SDMA is on gfxhub as well for Navi1* series */
+ gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}
+
+const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hiq_mqd_load = kgd_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5f459bf5f622..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -20,8 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
@@ -86,65 +84,6 @@ union TCP_WATCH_CNTL_BITS {
float f32All;
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
- uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
-
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -170,37 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = set_scratch_backing_va,
- .get_tile_config = get_tile_config,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
- .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -303,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
{
uint32_t retval;
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
return retval;
}
@@ -413,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
unsigned long end_jiffies;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t data;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- if (m->sdma_engine_id) {
- data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
- } else {
- data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
- }
data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdma_rlc_rb_rptr);
if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdma_rlc_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdma_rlc_virtual_addr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdma_rlc_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdma_rlc_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -524,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -645,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
return 0;
}
@@ -758,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -801,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid\n");
- return 0;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
-
/**
* read_vmid_from_vmfault_reg - read vmid from register
*
@@ -855,3 +711,26 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+
+const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6d2f61449606..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -20,9 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
@@ -44,62 +41,6 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -125,38 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = set_scratch_backing_va,
- .get_tile_config = get_tile_config,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -260,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
{
uint32_t retval;
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
- pr_debug("sdma base address: 0x%x\n", retval);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
return retval;
}
@@ -398,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
unsigned long end_jiffies;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t data;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- if (m->sdma_engine_id) {
- data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
- } else {
- data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
- }
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdmax_rlcx_virtual_addr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -517,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -641,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
return 0;
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -760,41 +657,25 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return -EINVAL;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
+const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 85395f2d83a6..8562afe5b761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,17 +19,10 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
-#include "soc15_hw_ip.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "vega10_enum.h"
@@ -49,75 +42,17 @@
#include "gfxhub_v1_0.h"
-#define V9_PIPE_PER_MEC (4)
-#define V9_QUEUES_PER_PIPE_MEC (8)
-
enum hqd_dequeue_request_type {
NO_ACTION = 0,
DRAIN_PIPE,
RESET_WAVES
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
-static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
struct tile_config *config)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -135,39 +70,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = set_scratch_backing_va,
- .get_tile_config = amdgpu_amdkfd_get_tile_config,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
- .get_hive_id = amdgpu_amdkfd_get_hive_id,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -201,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
- unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
- queue_id) & 31;
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
- return ((uint32_t)1) << bit;
+ return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
@@ -215,7 +117,7 @@ static void release_queue(struct kgd_dev *kgd)
unlock_srbm(kgd);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
@@ -232,7 +134,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
unlock_srbm(kgd);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -293,7 +195,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
* but still works
*/
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
@@ -313,22 +215,21 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
- uint32_t base[2] = {
+ uint32_t sdma_engine_reg_base[2] = {
SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
SOC15_REG_OFFSET(SDMA1, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
};
- uint32_t retval;
-
- retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
- mmSDMA0_RLC0_RB_CNTL);
+ uint32_t retval = sdma_engine_reg_base[engine_id]
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
return retval;
}
@@ -343,7 +244,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
@@ -357,21 +258,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
acquire_queue(kgd, pipe_id, queue_id);
- /* HIQ is set during driver init period with vmid set to 0*/
- if (m->cp_hqd_vmid == 0) {
- uint32_t value, mec, pipe;
-
- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
- mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
- value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
- ((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
- }
-
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -422,7 +308,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- get_queue_mask(adev, pipe_id, queue_id));
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
@@ -438,7 +324,60 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v9_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
+int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
@@ -475,71 +414,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdmax_gfx_context_cntl = m->sdma_engine_id ?
- SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
- SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- data = RREG32(sdmax_gfx_context_cntl);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(sdmax_gfx_context_cntl, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -549,7 +484,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
@@ -559,15 +495,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -575,7 +511,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -601,14 +537,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -616,7 +552,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
@@ -671,155 +607,60 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
- + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type)
+int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- uint32_t flush_type = 0;
-
- if (adev->in_gpu_reset)
- return -EIO;
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20)
- flush_type = 2;
-
- if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
- if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
- if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
- == pasid) {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- flush_type);
- break;
- }
- }
- }
-
return 0;
}
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
- return 0;
-}
-
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
+int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
@@ -828,7 +669,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd,
return 0;
}
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd)
{
@@ -853,24 +694,15 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset)
{
return 0;
}
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid)
-{
- /* No longer needed on GFXv9. The scratch base address is
- * passed to the shader by the CP. It's the user mode driver's
- * responsibility.
- */
-}
-
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+ uint32_t vmid, uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -880,11 +712,31 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
return;
}
- /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}
+
+const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_gfx_v9_address_watch_disable,
+ .address_watch_execute = kgd_gfx_v9_address_watch_execute,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .get_tile_config = kgd_gfx_v9_get_tile_config,
+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
new file mode 100644
index 000000000000..63d3e6683dfe
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off);
+int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd);
+int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid);
+int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6a5c96e519b1..fa8ac9d19a7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,9 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
#include <linux/dma-buf.h>
#include <linux/list.h>
#include <linux/pagemap.h>
@@ -33,11 +30,6 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
- * a HW bug.
- */
-#define VI_BO_SIZE_ALIGN (0x8000)
-
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -93,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}
/* Set memory usage limits. Current, limits are
- * System (TTM + userptr) memory - 3/4th System RAM
+ * System (TTM + userptr) memory - 15/16th System RAM
* TTM memory - 3/8th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
@@ -106,18 +98,31 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -218,14 +223,14 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef)
{
- struct reservation_object *resv = bo->tbo.resv;
- struct reservation_object_list *old, *new;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ struct dma_resv_list *old, *new;
unsigned int i, j, k;
if (!ef)
return -EINVAL;
- old = reservation_object_get_list(resv);
+ old = dma_resv_get_list(resv);
if (!old)
return 0;
@@ -241,7 +246,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct dma_fence *f;
f = rcu_dereference_protected(old->shared[i],
- reservation_object_held(resv));
+ dma_resv_held(resv));
if (f->context == ef->base.context)
RCU_INIT_POINTER(new->shared[--j], f);
@@ -263,7 +268,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct dma_fence *f;
f = rcu_dereference_protected(new->shared[i],
- reservation_object_held(resv));
+ dma_resv_held(resv));
dma_fence_put(f);
}
kfree_rcu(old, rcu);
@@ -349,11 +354,44 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_update_directories(adev, vm);
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
return ret;
- return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
+ return amdgpu_sync_fence(sync, vm->last_update, false);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
+ uint32_t mapping_flags;
+
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+ if (bo_adev == adev)
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ default:
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
}
/* add_bo_to_vm - Add a BO to a VM
@@ -404,8 +442,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
}
bo_va_entry->va = va;
- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
- mem->mapping_flags);
+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
bo_va_entry->kgd_dev = (void *)adev;
list_add(&bo_va_entry->bo_list, list_bo_va);
@@ -481,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -586,7 +622,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates, true);
+ false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else {
@@ -659,7 +695,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
}
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates, true);
+ false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else
@@ -714,7 +750,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
return 0;
}
@@ -733,7 +769,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
- return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -812,7 +848,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
struct amdgpu_bo *pd = peer_vm->root.base.bo;
ret = amdgpu_sync_resv(NULL,
- sync, pd->tbo.resv,
+ sync, pd->tbo.base.resv,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
return ret;
@@ -887,7 +923,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
- ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
+ ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.base.bo,
@@ -1079,10 +1115,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
- int byte_align;
u32 domain, alloc_domain;
u64 alloc_flags;
- uint32_t mapping_flags;
int ret;
/*
@@ -1090,7 +1124,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
@@ -1103,7 +1137,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = 0;
if (!offset || !*offset)
return -EINVAL;
- user_addr = *offset;
+ user_addr = untagged_addr(*offset);
} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
ALLOC_MEM_FLAGS_MMIO_REMAP)) {
domain = AMDGPU_GEM_DOMAIN_GTT;
@@ -1135,25 +1169,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if ((*mem)->aql_queue)
size = size >> 1;
- /* Workaround for TLB bug on older VI chips */
- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
- adev->asic_type != CHIP_FIJI &&
- adev->asic_type != CHIP_POLARIS10 &&
- adev->asic_type != CHIP_POLARIS11 &&
- adev->asic_type != CHIP_POLARIS12 &&
- adev->asic_type != CHIP_VEGAM) ?
- VI_BO_SIZE_ALIGN : 1;
-
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- if (flags & ALLOC_MEM_FLAGS_COHERENT)
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- else
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
- (*mem)->mapping_flags = mapping_flags;
+ (*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
@@ -1168,7 +1184,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
memset(&bp, 0, sizeof(bp));
bp.size = size;
- bp.byte_align = byte_align;
+ bp.byte_align = 1;
bp.domain = alloc_domain;
bp.flags = alloc_flags;
bp.type = bo_type;
@@ -1195,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
+ ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1626,9 +1642,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock);
- (*mem)->mapping_flags =
- AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
- AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+ (*mem)->alloc_flags =
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) |
+ ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE;
(*mem)->bo = amdgpu_bo_ref(bo);
(*mem)->va = va;
@@ -1657,10 +1674,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
struct mm_struct *mm)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int invalid, evicted_bos;
+ int evicted_bos;
int r = 0;
- invalid = atomic_inc_return(&mem->invalid);
+ atomic_inc(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
@@ -1739,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Mark the BO as valid unless it was invalidated
@@ -1797,8 +1818,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
- true);
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
goto out_free;
@@ -1996,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
- false, &duplicate_save, true);
+ false, &duplicate_save);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
@@ -2028,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
- ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
@@ -2109,6 +2129,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
return -ENOMEM;
mutex_init(&(*mem)->lock);
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
(*mem)->bo = amdgpu_bo_ref(gws_bo);
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
(*mem)->process_info = process_info;
@@ -2133,7 +2154,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
* Add process eviction fence to bo so they can
* evict each other.
*/
- ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1);
+ ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1c9d40f97a9b..fdd52d86a4d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
path_size += le16_to_cpu(path->usSize);
if (device_support & le16_to_cpu(path->usDeviceTag)) {
- uint8_t con_obj_id, con_obj_num, con_obj_type;
-
- con_obj_id =
+ uint8_t con_obj_id =
(le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
>> OBJECT_ID_SHIFT;
- con_obj_num =
- (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
- >> ENUM_ID_SHIFT;
- con_obj_type =
- (le16_to_cpu(path->usConnObjectId) &
- OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
/* Skip TV/CV support */
if ((le16_to_cpu(path->usDeviceTag) ==
@@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
router.ddc_valid = false;
router.cd_valid = false;
for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
- uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
-
- grph_obj_id =
- (le16_to_cpu(path->usGraphicObjIds[j]) &
- OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- grph_obj_num =
- (le16_to_cpu(path->usGraphicObjIds[j]) &
- ENUM_ID_MASK) >> ENUM_ID_SHIFT;
- grph_obj_type =
+ uint8_t grph_obj_type =
(le16_to_cpu(path->usGraphicObjIds[j]) &
OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
@@ -2038,6 +2022,11 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
if (adev->is_atom_fw) {
amdgpu_atomfirmware_scratch_regs_init(adev);
amdgpu_atomfirmware_allocate_fb_scratch(adev);
+ ret = amdgpu_atomfirmware_get_mem_train_info(adev);
+ if (ret) {
+ DRM_ERROR("Failed to get mem train fb location.\n");
+ return ret;
+ }
} else {
amdgpu_atombios_scratch_regs_init(adev);
amdgpu_atombios_allocate_fb_scratch(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index daf687428cdb..58f9d8c3a17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -27,6 +27,7 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
#include "atombios.h"
+#include "soc15_hw_ip.h"
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
{
@@ -120,65 +121,14 @@ union vram_info {
struct atom_vram_info_header_v2_3 v23;
struct atom_vram_info_header_v2_4 v24;
};
-/*
- * Return vram width from integrated system info table, if available,
- * or 0 if not.
- */
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
-{
- struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index;
- u16 data_offset, size;
- union igp_info *igp_info;
- union vram_info *vram_info;
- u32 mem_channel_number;
- u32 mem_channel_width;
- u8 frev, crev;
- if (adev->flags & AMD_IS_APU)
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- integratedsysteminfo);
- else
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_info);
-
- /* get any igp specific overrides */
- if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
- &frev, &crev, &data_offset)) {
- if (adev->flags & AMD_IS_APU) {
- igp_info = (union igp_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 11:
- mem_channel_number = igp_info->v11.umachannelnumber;
- /* channel width is 64 */
- return mem_channel_number * 64;
- default:
- return 0;
- }
- } else {
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 3:
- mem_channel_number = vram_info->v23.vram_module[0].channel_num;
- mem_channel_width = vram_info->v23.vram_module[0].channel_width;
- return mem_channel_number * (1 << mem_channel_width);
- case 4:
- mem_channel_number = vram_info->v24.vram_module[0].channel_num;
- mem_channel_width = vram_info->v24.vram_module[0].channel_width;
- return mem_channel_number * (1 << mem_channel_width);
- default:
- return 0;
- }
- }
- }
-
- return 0;
-}
+union vram_module {
+ struct atom_vram_module_v9 v9;
+ struct atom_vram_module_v10 v10;
+};
-static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
- int atom_mem_type)
+static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
+ int atom_mem_type)
{
int vram_type;
@@ -219,19 +169,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
return vram_type;
}
-/*
- * Return vram type from either integrated system info table
- * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
- */
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+
+
+int
+amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type,
+ int *vram_vendor)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index;
+ int index, i = 0;
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
+ union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
+ u8 mem_vendor;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
+ u32 module_id;
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
@@ -239,6 +195,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_info);
+
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -247,25 +204,67 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
(mode_info->atom_context->bios + data_offset);
switch (crev) {
case 11:
+ mem_channel_number = igp_info->v11.umachannelnumber;
+ /* channel width is 64 */
+ if (vram_width)
+ *vram_width = mem_channel_number * 64;
mem_type = igp_info->v11.memorytype;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
default:
- return 0;
+ return -EINVAL;
}
} else {
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
switch (crev) {
case 3:
- mem_type = vram_info->v23.vram_module[0].memory_type;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
case 4:
- mem_type = vram_info->v24.vram_module[0].memory_type;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
default:
- return 0;
+ return -EINVAL;
}
}
+
}
return 0;
@@ -464,3 +463,108 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
}
return -EINVAL;
}
+
+/*
+ * Check if VBIOS supports GDDR6 training data save/restore
+ */
+static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
+{
+ uint16_t data_offset;
+ int index;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+ NULL, NULL, &data_offset)) {
+ struct atom_firmware_info_v3_1 *firmware_info =
+ (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+ data_offset);
+
+ DRM_DEBUG("atom firmware capability:0x%08x.\n",
+ le32_to_cpu(firmware_info->firmware_capability));
+
+ if (le32_to_cpu(firmware_info->firmware_capability) &
+ ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING)
+ return true;
+ }
+
+ return false;
+}
+
+static int gddr6_mem_train_support(struct amdgpu_device *adev)
+{
+ int ret;
+ uint32_t major, minor, revision, hw_v;
+
+ if (gddr6_mem_train_vbios_support(adev)) {
+ amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision);
+ hw_v = HW_REV(major, minor, revision);
+ /*
+ * treat 0 revision as a special case since register for MP0 and MMHUB is missing
+ * for some Navi10 A0, preventing driver from discovering the hwip information since
+ * none of the functions will be initialized, it should not cause any problems
+ */
+ switch (hw_v) {
+ case HW_REV(11, 0, 0):
+ case HW_REV(11, 0, 5):
+ ret = 1;
+ break;
+ default:
+ DRM_ERROR("memory training vbios supports but psp hw(%08x)"
+ " doesn't support!\n", hw_v);
+ ret = -1;
+ break;
+ }
+ } else {
+ ret = 0;
+ hw_v = -1;
+ }
+
+
+ DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret);
+ return ret;
+}
+
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index;
+ uint8_t frev, crev;
+ uint16_t data_offset, size;
+ int ret;
+
+ adev->fw_vram_usage.mem_train_support = false;
+
+ if (adev->asic_type != CHIP_NAVI10 &&
+ adev->asic_type != CHIP_NAVI14)
+ return 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = gddr6_mem_train_support(adev);
+ if (ret == -1)
+ return -EINVAL;
+ else if (ret == 0)
+ return 0;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_usagebyfirmware);
+ ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev,
+ &data_offset);
+ if (ret == 0) {
+ DRM_ERROR("parse data header failed.\n");
+ return -EINVAL;
+ }
+
+ DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x,"
+ " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset);
+ /* only support 2.1+ */
+ if (((uint16_t)frev << 8 | crev) < 0x0201) {
+ DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev);
+ return -EINVAL;
+ }
+
+ adev->fw_vram_usage.mem_train_support = true;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 5ec6f92f353c..434fe2fa0089 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -29,8 +29,9 @@
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 9b384a94d2f3..3e35a8f2c5e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -574,6 +574,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 649e68c4479b..d1495e1c9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int i, r;
start_jiffies = jiffies;
@@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
if (r)
goto exit_do_move;
- dma_fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
- if (fence)
- dma_fence_put(fence);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7bcf86c61999..85b0515c0fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0;
error_free:
- while (i--) {
+ for (i = 0; i < last_entry; ++i) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+ amdgpu_bo_unref(&bo);
+ }
+ for (i = first_userptr; i < num_entries; ++i) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
@@ -270,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_create_list_entry_array(&args->in, &info);
if (r)
- goto error_free;
+ return r;
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
@@ -283,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
- amdgpu_bo_list_put(list);
- return r;
+ goto error_put_list;
}
handle = r;
@@ -306,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&fpriv->bo_list_lock);
if (IS_ERR(old)) {
- amdgpu_bo_list_put(list);
r = PTR_ERR(old);
- goto error_free;
+ goto error_put_list;
}
amdgpu_bo_list_put(old);
@@ -325,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
+error_put_list:
+ amdgpu_bo_list_put(list);
+
error_free:
- if (info)
- kvfree(info);
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 73b2ede773d3..a62cbc8199de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -217,11 +217,10 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
struct drm_encoder *encoder;
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
bool connected;
- int i;
best_encoder = connector_funcs->best_encoder(connector);
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
if ((encoder == best_encoder) && (status == connector_status_connected))
connected = true;
else
@@ -236,9 +235,8 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
int encoder_type)
{
struct drm_encoder *encoder;
- int i;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
if (encoder->encoder_type == encoder_type)
return encoder;
}
@@ -347,10 +345,9 @@ static struct drm_encoder *
amdgpu_connector_best_single_encoder(struct drm_connector *connector)
{
struct drm_encoder *encoder;
- int i;
/* pick the first one */
- drm_connector_for_each_possible_encoder(connector, encoder, i)
+ drm_connector_for_each_possible_encoder(connector, encoder)
return encoder;
return NULL;
@@ -1022,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
*/
if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *list_amdgpu_connector;
- list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
if (connector == list_connector)
continue;
list_amdgpu_connector = to_amdgpu_connector(list_connector);
@@ -1040,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
}
}
+ drm_connector_list_iter_end(&iter);
}
}
}
@@ -1065,9 +1067,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* find analog encoder */
if (amdgpu_connector->dac_load_detect) {
struct drm_encoder *encoder;
- int i;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
continue;
@@ -1117,9 +1118,8 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct drm_encoder *encoder;
- int i;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
if (amdgpu_connector->use_digital == true) {
if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
return encoder;
@@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
/* then check use digitial */
/* pick the first one */
- drm_connector_for_each_possible_encoder(connector, encoder, i)
+ drm_connector_for_each_possible_encoder(connector, encoder)
return encoder;
return NULL;
@@ -1271,9 +1271,8 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
{
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
- int i;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
switch (amdgpu_encoder->encoder_id) {
@@ -1292,10 +1291,9 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
{
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
- int i;
bool found = false;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
found = true;
@@ -1501,10 +1499,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
+ struct i2c_adapter *ddc = NULL;
uint32_t subpixel_order = SubPixelNone;
bool shared_ddc = false;
bool is_dp_bridge = false;
@@ -1514,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
return;
/* see if we already added it */
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->connector_id == connector_id) {
amdgpu_connector->devices |= supported_device;
+ drm_connector_list_iter_end(&iter);
return;
}
if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
@@ -1532,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
}
}
}
+ drm_connector_list_iter_end(&iter);
/* check if it's a dp bridge */
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
@@ -1574,17 +1577,21 @@ amdgpu_connector_add(struct amdgpu_device *adev,
amdgpu_connector->con_priv = amdgpu_dig_connector;
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
- if (amdgpu_connector->ddc_bus)
+ if (amdgpu_connector->ddc_bus) {
has_aux = true;
- else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
}
switch (connector_type) {
case DRM_MODE_CONNECTOR_VGA:
case DRM_MODE_CONNECTOR_DVIA:
default:
- drm_connector_init(dev, &amdgpu_connector->base,
- &amdgpu_connector_dp_funcs, connector_type);
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
drm_connector_helper_add(&amdgpu_connector->base,
&amdgpu_connector_dp_helper_funcs);
connector->interlace_allowed = true;
@@ -1602,8 +1609,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
case DRM_MODE_CONNECTOR_HDMIA:
case DRM_MODE_CONNECTOR_HDMIB:
case DRM_MODE_CONNECTOR_DisplayPort:
- drm_connector_init(dev, &amdgpu_connector->base,
- &amdgpu_connector_dp_funcs, connector_type);
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
drm_connector_helper_add(&amdgpu_connector->base,
&amdgpu_connector_dp_helper_funcs);
drm_object_attach_property(&amdgpu_connector->base.base,
@@ -1644,8 +1653,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
break;
case DRM_MODE_CONNECTOR_LVDS:
case DRM_MODE_CONNECTOR_eDP:
- drm_connector_init(dev, &amdgpu_connector->base,
- &amdgpu_connector_edp_funcs, connector_type);
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_edp_funcs,
+ connector_type,
+ ddc);
drm_connector_helper_add(&amdgpu_connector->base,
&amdgpu_connector_dp_helper_funcs);
drm_object_attach_property(&amdgpu_connector->base.base,
@@ -1659,13 +1670,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,
} else {
switch (connector_type) {
case DRM_MODE_CONNECTOR_VGA:
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_vga_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
amdgpu_connector->dac_load_detect = true;
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.load_detect_property,
@@ -1679,13 +1695,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,
connector->doublescan_allowed = true;
break;
case DRM_MODE_CONNECTOR_DVIA:
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_vga_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
amdgpu_connector->dac_load_detect = true;
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.load_detect_property,
@@ -1704,13 +1725,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (!amdgpu_dig_connector)
goto failed;
amdgpu_connector->con_priv = amdgpu_dig_connector;
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dvi_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
subpixel_order = SubPixelHorizontalRGB;
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.coherent_mode_property,
@@ -1754,13 +1780,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (!amdgpu_dig_connector)
goto failed;
amdgpu_connector->con_priv = amdgpu_dig_connector;
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dvi_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.coherent_mode_property,
1);
@@ -1796,15 +1827,20 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (!amdgpu_dig_connector)
goto failed;
amdgpu_connector->con_priv = amdgpu_dig_connector;
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
- if (amdgpu_connector->ddc_bus)
+ if (amdgpu_connector->ddc_bus) {
has_aux = true;
- else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
subpixel_order = SubPixelHorizontalRGB;
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.coherent_mode_property,
@@ -1838,15 +1874,20 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (!amdgpu_dig_connector)
goto failed;
amdgpu_connector->con_priv = amdgpu_dig_connector;
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
- if (amdgpu_connector->ddc_bus)
+ if (amdgpu_connector->ddc_bus) {
has_aux = true;
- else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_edp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
drm_object_attach_property(&amdgpu_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
@@ -1859,13 +1900,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (!amdgpu_dig_connector)
goto failed;
amdgpu_connector->con_priv = amdgpu_dig_connector;
- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type);
- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
}
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_lvds_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);
drm_object_attach_property(&amdgpu_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4e4094f842e7..a52a084158b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -35,6 +35,7 @@
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
+#include "amdgpu_ras.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
@@ -402,7 +403,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
- .resv = bo->tbo.resv,
+ .resv = bo->tbo.base.resv,
.flags = 0
};
uint32_t domain;
@@ -449,75 +450,12 @@ retry:
return r;
}
-/* Last resort, try to evict something from the current working set */
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
- struct amdgpu_bo *validated)
-{
- uint32_t domain = validated->allowed_domains;
- struct ttm_operation_ctx ctx = { true, false };
- int r;
-
- if (!p->evictable)
- return false;
-
- for (;&p->evictable->tv.head != &p->validated;
- p->evictable = list_prev_entry(p->evictable, tv.head)) {
-
- struct amdgpu_bo_list_entry *candidate = p->evictable;
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- bool update_bytes_moved_vis;
- uint32_t other;
-
- /* If we reached our current BO we can forget it */
- if (bo == validated)
- break;
-
- /* We can't move pinned BOs here */
- if (bo->pin_count)
- continue;
-
- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
- /* Check if this BO is in one of the domains we need space for */
- if (!(other & domain))
- continue;
-
- /* Check if we can move this BO somewhere else */
- other = bo->allowed_domains & ~domain;
- if (!other)
- continue;
-
- /* Good we can try to move this BO somewhere else */
- update_bytes_moved_vis =
- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo);
- amdgpu_bo_placement_from_domain(bo, other);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- p->bytes_moved += ctx.bytes_moved;
- if (update_bytes_moved_vis)
- p->bytes_moved_vis += ctx.bytes_moved;
-
- if (unlikely(r))
- break;
-
- p->evictable = list_prev_entry(p->evictable, tv.head);
- list_move(&candidate->tv.head, &p->validated);
-
- return true;
- }
-
- return false;
-}
-
static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
{
struct amdgpu_cs_parser *p = param;
int r;
- do {
- r = amdgpu_cs_bo_validate(p, bo);
- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+ r = amdgpu_cs_bo_validate(p, bo);
if (r)
return r;
@@ -536,7 +474,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
- bool binding_userptr = false;
struct mm_struct *usermm;
usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
@@ -553,20 +490,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
- binding_userptr = true;
}
- if (p->evictable == lobj)
- p->evictable = NULL;
-
r = amdgpu_cs_validate(p, bo);
if (r)
return r;
- if (binding_userptr) {
- kvfree(lobj->user_pages);
- lobj->user_pages = NULL;
- }
+ kvfree(lobj->user_pages);
+ lobj->user_pages = NULL;
}
return 0;
}
@@ -607,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -650,7 +579,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates, false);
+ &duplicates);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
@@ -661,9 +590,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
- p->evictable = list_last_entry(&p->validated,
- struct amdgpu_bo_list_entry,
- tv.head);
r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
amdgpu_cs_validate, p);
@@ -730,7 +656,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
list_for_each_entry(e, &p->validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- struct reservation_object *resv = bo->tbo.resv;
+ struct dma_resv *resv = bo->tbo.base.resv;
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
amdgpu_bo_explicit_sync(bo));
@@ -869,29 +795,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync,
- fpriv->prt_va->last_pt_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
- struct dma_fence *f;
-
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_fence *f;
-
/* ignore duplicates */
bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo)
@@ -905,8 +825,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -915,11 +834,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_vm_update_directories(adev, vm);
+ r = amdgpu_vm_update_pdes(adev, vm, false);
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
if (r)
return r;
@@ -990,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (parser->entity && parser->entity != entity)
return -EINVAL;
+ /* Return if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP*/
+ if (entity->rq == NULL)
+ return -EINVAL;
+
parser->entity = entity;
ring = to_amdgpu_ring(entity->rq->sched);
@@ -1061,7 +985,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence);
if (r)
return r;
@@ -1083,7 +1007,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence);
return r;
@@ -1143,6 +1067,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
+ if (p->post_deps)
+ return -EINVAL;
+
p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
GFP_KERNEL);
p->num_post_deps = 0;
@@ -1166,8 +1093,7 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk
- *chunk)
+ struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
unsigned num_deps;
@@ -1177,6 +1103,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ if (p->post_deps)
+ return -EINVAL;
+
p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
GFP_KERNEL);
p->num_post_deps = 0;
@@ -1286,11 +1215,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock.
- * p->mn is hold until amdgpu_cs_submit is finished and fence is added
- * to BOs.
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
*/
- amdgpu_mn_lock(p->mn);
+ mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1305,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
goto error_abort;
}
- job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
@@ -1333,13 +1261,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
@@ -1354,6 +1282,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
bool reserved_buffers = false;
int i, r;
+ if (amdgpu_ras_intr_triggered())
+ return -EHWPOISON;
+
if (!adev->accel_working)
return -EBUSY;
@@ -1727,7 +1658,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
*map = mapping;
/* Double check that the BO is reserved by this CS */
- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
return -EINVAL;
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 35a8d3c96fc9..08047bc4d588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -80,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index f539a2a92774..94a6c42f29ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgput_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,47 +61,94 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
+{
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
+ int r;
+
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
+
+ entity->sequence = 1;
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
+ sched = &adev->gfx.gfx_ring[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ scheds = adev->gfx.compute_sched;
+ num_scheds = adev->gfx.num_compute_sched;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ scheds = adev->sdma.sdma_sched;
+ num_scheds = adev->sdma.num_sdma_sched;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ sched = &adev->uvd.inst[0].ring.sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ sched = &adev->vce.ring[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ sched = &adev->uvd.inst[0].ring_enc[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ scheds = adev->vcn.vcn_dec_sched;
+ num_scheds = adev->vcn.num_vcn_dec_sched;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ scheds = adev->vcn.vcn_enc_sched;
+ num_scheds = adev->vcn.num_vcn_enc_sched;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ scheds = adev->jpeg.jpeg_sched;
+ num_scheds = adev->jpeg.num_jpeg_sched;
+ break;
+ }
+
+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+ &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
+ ctx->entities[hw_ip][ring] = entity;
+ return 0;
+
+error_free_entity:
+ kfree(entity);
+
+ return r;
+}
+
static int amdgpu_ctx_init(struct amdgpu_device *adev,
enum drm_sched_priority priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
- unsigned num_entities = amdgput_ctx_total_num_entities();
- unsigned i, j;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
- ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!ctx->fences)
- return -ENOMEM;
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0]) {
- r = -ENOMEM;
- goto error_free_fences;
- }
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
- entity->sequence = 1;
- entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
- }
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
+ ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
@@ -120,104 +160,49 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
- struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
- unsigned num_rings;
- unsigned num_rqs = 0;
-
- switch (i) {
- case AMDGPU_HW_IP_GFX:
- rings[0] = &adev->gfx.gfx_ring[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- for (j = 0; j < adev->gfx.num_compute_rings; ++j)
- rings[j] = &adev->gfx.compute_ring[j];
- num_rings = adev->gfx.num_compute_rings;
- break;
- case AMDGPU_HW_IP_DMA:
- for (j = 0; j < adev->sdma.num_instances; ++j)
- rings[j] = &adev->sdma.instance[j].ring;
- num_rings = adev->sdma.num_instances;
- break;
- case AMDGPU_HW_IP_UVD:
- rings[0] = &adev->uvd.inst[0].ring;
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCE:
- rings[0] = &adev->vce.ring[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_UVD_ENC:
- rings[0] = &adev->uvd.inst[0].ring_enc[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCN_DEC:
- rings[0] = &adev->vcn.ring_dec;
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCN_ENC:
- rings[0] = &adev->vcn.ring_enc[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCN_JPEG:
- rings[0] = &adev->vcn.ring_jpeg;
- num_rings = 1;
- break;
- }
+ return 0;
- for (j = 0; j < num_rings; ++j) {
- if (!rings[j]->adev)
- continue;
+}
- rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
- }
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rqs, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
- }
+ int i;
- return 0;
+ if (!entity)
+ return;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
- kfree(ctx->entities[0]);
+ for (i = 0; i < amdgpu_sched_jobs; ++i)
+ dma_fence_put(entity->fences[i]);
-error_free_fences:
- kfree(ctx->fences);
- ctx->fences = NULL;
- return r;
+ kfree(entity);
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i)
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->entities[0][i].fences[j]);
- kfree(ctx->fences);
- kfree(ctx->entities[0]);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+ ctx->entities[i][j] = NULL;
+ }
+ }
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
+ int r;
+
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
@@ -234,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -274,17 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- num_entities = 0;
- for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
- num_entities += amdgpu_ctx_num_entities[i];
-
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -344,7 +335,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
- uint32_t ras_counter;
+ unsigned long ras_counter;
if (!fpriv)
return -EINVAL;
@@ -514,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgput_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -534,21 +529,24 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity)
{
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
- unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
- struct dma_fence *other = centity->fences[idx];
+ struct dma_fence *other;
+ unsigned idx;
+ long r;
- if (other) {
- signed long r;
- r = dma_fence_wait(other, true);
- if (r < 0) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ spin_lock(&ctx->ring_lock);
+ idx = centity->sequence & (amdgpu_sched_jobs - 1);
+ other = dma_fence_get(centity->fences[idx]);
+ spin_unlock(&ctx->ring_lock);
- return r;
- }
- }
+ if (!other)
+ return 0;
- return 0;
+ r = dma_fence_wait(other, true);
+ if (r < 0 && r != -ERESTARTSYS)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
+ dma_fence_put(other);
+ return r;
}
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
@@ -559,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -581,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -594,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
@@ -617,3 +627,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
+
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
+ adev->gfx.num_gfx_sched++;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
+ adev->gfx.num_compute_sched++;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
+ adev->sdma.num_sdma_sched++;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
+ &adev->vcn.inst[i].ring_dec.sched;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+ adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
+ &adev->vcn.inst[i].ring_enc[j].sched;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+ adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
+ &adev->jpeg.inst[i].ring_dec.sched;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 5f1b54c9bcdb..de490f183af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+#define AMDGPU_MAX_ENTITY_NUM 4
+
struct amdgpu_ctx_entity {
uint64_t sequence;
- struct dma_fence **fences;
struct drm_sched_entity entity;
+ struct dma_fence *fences[];
};
struct amdgpu_ctx {
@@ -42,15 +44,14 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct dma_fence **fences;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
- uint32_t ras_counter_ce;
- uint32_t ras_counter_ue;
+ unsigned long ras_counter_ce;
+ unsigned long ras_counter_ue;
};
struct amdgpu_ctx_mgr {
@@ -87,4 +88,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 5652cc72ed3a..f24ed9a1a3e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -26,6 +26,7 @@
#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_debugfs.h>
@@ -129,7 +130,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
sh_bank = 0xFFFFFFFF;
if (instance_bank == 0x3FF)
instance_bank = 0xFFFFFFFF;
- use_bank = 1;
+ use_bank = true;
} else if (*pos & (1ULL << 61)) {
me = (*pos & GENMASK_ULL(33, 24)) >> 24;
@@ -137,17 +138,24 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
- use_ring = 1;
+ use_ring = true;
} else {
- use_bank = use_ring = 0;
+ use_bank = use_ring = false;
}
*pos &= (1UL << 22) - 1;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
- (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+ (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
sh_bank, instance_bank);
@@ -193,6 +201,9 @@ end:
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_PCIE(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_PCIE(*pos >> 2, value);
@@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_DIDT(*pos >> 2, value);
@@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_SMC(*pos, value);
@@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
idx = *pos >> 2;
valuesize = sizeof(values);
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
mutex_unlock(&adev->grbm_idx_mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (!x)
return -EINVAL;
@@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
if (!data)
return -ENOMEM;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
mutex_unlock(&adev->grbm_idx_mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
while (size) {
uint32_t value;
@@ -859,6 +953,13 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
int r = 0, i;
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
+
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -884,6 +985,11 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
kthread_unpark(ring->sched.thread);
}
+ mutex_unlock(&adev->lock_reset);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -902,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -912,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -1036,6 +1160,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
if (!fences)
return -ENOMEM;
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* stop the scheduler */
kthread_park(ring->sched.thread);
@@ -1075,10 +1202,11 @@ failure:
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
+ mutex_unlock(&adev->lock_reset);
+
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
- if (fences)
- kfree(fences);
+ kfree(fences);
return 0;
}
@@ -1090,8 +1218,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
adev->debugfs_preempt =
debugfs_create_file("amdgpu_preempt_ib", 0600,
- adev->ddev->primary->debugfs_root,
- (void *)adev, &fops_ib_preempt);
+ adev->ddev->primary->debugfs_root, adev,
+ &fops_ib_preempt);
if (!(adev->debugfs_preempt)) {
DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
return -EIO;
@@ -1103,8 +1231,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
{
- if (adev->debugfs_preempt)
- debugfs_remove(adev->debugfs_preempt);
+ debugfs_remove(adev->debugfs_preempt);
}
#else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a7f893cf724..39cd545976b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -65,16 +65,23 @@
#include "amdgpu_ras.h"
#include "amdgpu_pmu.h"
+#include <linux/suspend.h>
+#include <drm/task_barrier.h>
+
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
-static const char *amdgpu_asic_name[] = {
+const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
"VERDE",
@@ -98,7 +105,11 @@ static const char *amdgpu_asic_name[] = {
"VEGA12",
"VEGA20",
"RAVEN",
+ "ARCTURUS",
+ "RENOIR",
"NAVI10",
+ "NAVI14",
+ "NAVI12",
"LAST",
};
@@ -127,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
- * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
*
* @dev: drm_device pointer
*
* Returns true if the device is a dGPU with HG/PX power control,
* otherwise return false.
*/
-bool amdgpu_device_is_px(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -143,6 +154,51 @@ bool amdgpu_device_is_px(struct drm_device *dev)
return false;
}
+/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device supporte BACO,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_baco(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+
+ return amdgpu_asic_supports_baco(adev);
+}
+
+/**
+ * VRAM access helper functions.
+ *
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ uint32_t *buf, size_t size, bool write)
+{
+ uint64_t last;
+ unsigned long flags;
+
+ last = size - 4;
+ for (last += pos; pos <= last; pos += 4) {
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *buf++);
+ else
+ *buf++ = RREG32_NO_KIQ(mmMM_DATA);
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ }
+}
+
/*
* MMIO register access helper functions.
*/
@@ -160,8 +216,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
{
uint32_t ret;
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
- return amdgpu_virt_kiq_rreg(adev, reg);
+ if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+ return amdgpu_kiq_rreg(adev, reg);
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -238,8 +294,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
adev->last_mm_index = v;
}
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
- return amdgpu_virt_kiq_wreg(adev, reg, v);
+ if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+ return amdgpu_kiq_wreg(adev, reg, v);
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -413,6 +469,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
}
/**
+ * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
+ *
+ * @adev: amdgpu device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
+{
+ DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg64 - dummy reg write function
+ *
+ * @adev: amdgpu device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+ DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
+/**
* amdgpu_block_invalid_rreg - dummy reg read function
*
* @adev: amdgpu device pointer
@@ -895,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
{
struct sysinfo si;
- bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+ bool is_os_64 = (sizeof(void *) == 8);
uint64_t total_memory;
uint64_t dram_size_seven_GB = 0x1B8000000;
uint64_t dram_size_three_GB = 0xB8000000;
@@ -942,8 +1032,6 @@ def_value:
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
- int ret = 0;
-
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -981,15 +1069,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_device_check_block_size(adev);
- ret = amdgpu_device_get_job_timeout_settings(adev);
- if (ret) {
- dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
- return ret;
- }
-
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- return ret;
+ return 0;
}
/**
@@ -1004,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ int r;
- if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1013,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_resume(dev, true, true);
+ pci_set_power_state(dev->pdev, PCI_D0);
+ pci_restore_state(dev->pdev);
+ r = pci_enable_device(dev->pdev);
+ if (r)
+ DRM_WARN("pci_enable_device failed (%d)\n", r);
+ amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
@@ -1021,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
pr_info("amdgpu: switched off\n");
drm_kms_helper_poll_disable(dev);
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_suspend(dev, true, true);
+ amdgpu_device_suspend(dev, true);
+ pci_save_state(dev->pdev);
+ /* Shut down the device */
+ pci_disable_device(dev->pdev);
+ pci_set_power_state(dev->pdev, PCI_D3cold);
dev->switch_power_state = DRM_SWITCH_POWER_OFF;
}
}
@@ -1384,9 +1476,21 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
else
chip_name = "raven";
break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
+ case CHIP_RENOIR:
+ chip_name = "renoir";
+ break;
case CHIP_NAVI10:
chip_name = "navi10";
break;
+ case CHIP_NAVI14:
+ chip_name = "navi14";
+ break;
+ case CHIP_NAVI12:
+ chip_name = "navi12";
+ break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@@ -1415,6 +1519,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ goto parse_soc_bounding_box;
+
adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
@@ -1442,14 +1549,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
adev->gfx.config.num_packer_per_sc =
le32_to_cpu(gpu_info_fw->num_packer_per_sc);
}
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+
+parse_soc_bounding_box:
+ /*
+ * soc bounding box info is not integrated in disocovery table,
+ * we always need to parse it from gpu info firmware.
+ */
if (hdr->version_minor == 2) {
const struct gpu_info_firmware_v1_2 *gpu_info_fw =
(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
}
-#endif
break;
}
default:
@@ -1529,7 +1640,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
- if (adev->asic_type == CHIP_RAVEN)
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
+ if (adev->asic_type == CHIP_RAVEN ||
+ adev->asic_type == CHIP_RENOIR)
adev->family = AMDGPU_FAMILY_RV;
else
adev->family = AMDGPU_FAMILY_AI;
@@ -1539,6 +1653,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return r;
break;
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
adev->family = AMDGPU_FAMILY_NV;
r = nv_set_ip_blocks(adev);
@@ -1554,19 +1670,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ amdgpu_discovery_get_gfx_info(adev);
+
amdgpu_amdkfd_device_probe(adev);
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
-
- /* query the reg access mode at the very beginning */
- amdgpu_virt_init_reg_access_mode(adev);
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1665,29 +1781,36 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
- if (adev->in_gpu_reset || adev->in_suspend) {
- if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
- break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
+ /* no need to do the fw loading again if already done*/
+ if (adev->ip_blocks[i].status.hw == true)
+ break;
+
+ if (adev->in_gpu_reset || adev->in_suspend) {
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- } else {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
- if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
+ return r;
+ }
+ } else {
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
}
- adev->ip_blocks[i].status.hw = true;
}
+
+ adev->ip_blocks[i].status.hw = true;
+ break;
}
}
- r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+ r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
return r;
}
@@ -1754,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
r = amdgpu_ib_pool_init(adev);
if (r) {
dev_err(adev->dev, "IB initialization failed (%d).\n", r);
@@ -1777,16 +1903,26 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
+ /*
+ * retired pages will be loaded from eeprom and reserved here,
+ * it should be called after amdgpu_device_ip_hw_init_phase2 since
+ * for some ASICs the RAS EEPROM code relies on SMU fully functioning
+ * for I2C communication which only true at this point.
+ * recovery_init may fail, but it can free all resources allocated by
+ * itself and its failure should not stop amdgpu init process.
+ *
+ * Note: theoretically, this should be called before all vram allocations
+ * to protect retired page from abusing
+ */
+ amdgpu_ras_recovery_init(adev);
+
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
init_failed:
- if (amdgpu_sriov_vf(adev)) {
- if (!r)
- amdgpu_virt_init_data_exchange(adev);
+ if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, true);
- }
return r;
}
@@ -1825,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* amdgpu_device_set_cg_state - set clockgating for amdgpu device
*
* @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
*
* The list of all the hardware IPs that make up the asic is walked and the
* set_clockgating_state callbacks are run.
@@ -1849,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1879,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
@@ -1944,6 +2083,7 @@ out:
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1969,8 +2109,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
- /* set to low pstate by default */
- amdgpu_xgmi_set_pstate(adev, 0);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
return 0;
}
@@ -2128,7 +2299,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
if (r) {
DRM_ERROR("suspend of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
+ return r;
}
+ adev->ip_blocks[i].status.hw = false;
}
}
@@ -2156,6 +2329,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
+ /* PSP lost connection when err_event_athub occurs */
+ if (amdgpu_ras_intr_triggered() &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+ adev->ip_blocks[i].status.hw = false;
+ continue;
+ }
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@@ -2163,6 +2342,18 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
DRM_ERROR("suspend of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
+ adev->ip_blocks[i].status.hw = false;
+ /* handle putting the SMC in the appropriate state */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
+ if (r) {
+ DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
+ return r;
+ }
+ }
+
+ adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -2215,6 +2406,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
for (j = 0; j < adev->num_ip_blocks; j++) {
block = &adev->ip_blocks[j];
+ block->status.hw = false;
if (block->version->type != ip_order[i] ||
!block->status.valid)
continue;
@@ -2223,6 +2415,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
+ block->status.hw = true;
}
}
@@ -2239,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_UVD,
- AMD_IP_BLOCK_TYPE_VCE
+ AMD_IP_BLOCK_TYPE_VCE,
+ AMD_IP_BLOCK_TYPE_VCN
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -2250,13 +2444,19 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
block = &adev->ip_blocks[j];
if (block->version->type != ip_order[i] ||
- !block->status.valid)
+ !block->status.valid ||
+ block->status.hw)
continue;
- r = block->version->funcs->hw_init(adev);
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
+ r = block->version->funcs->resume(adev);
+ else
+ r = block->version->funcs->hw_init(adev);
+
DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
+ block->status.hw = true;
}
}
@@ -2280,17 +2480,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
}
@@ -2315,7 +2517,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
@@ -2328,6 +2530,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
return 0;
@@ -2421,15 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
case CHIP_RAVEN:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ case CHIP_RENOIR:
#endif
return amdgpu_dc != 0;
#endif
default:
+ if (amdgpu_dc > 0)
+ DRM_INFO("Display Core has been requested via kernel parameter "
+ "but isn't supported by ASIC, ignoring\n");
return false;
}
}
@@ -2454,13 +2661,110 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+
+ /* It's a bug to not have a hive within this function */
+ if (WARN_ON(!hive))
+ return;
+
+ /*
+ * Use task barrier to synchronize all xgmi reset works across the
+ * hive. task_barrier_enter and task_barrier_exit will block
+ * until all the threads running the xgmi reset works reach
+ * those points. task_barrier_full will do both blocks.
+ */
+ if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+ task_barrier_enter(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
- adev->asic_reset_res = amdgpu_asic_reset(adev);
+ task_barrier_exit(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
+ } else {
+
+ task_barrier_full(&hive->tb);
+ adev->asic_reset_res = amdgpu_asic_reset(adev);
+ }
+
+fail:
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
}
+static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+ char *input = amdgpu_lockup_timeout;
+ char *timeout_setting = NULL;
+ int index = 0;
+ long timeout;
+ int ret = 0;
+
+ /*
+ * By default timeout for non compute jobs is 10000.
+ * And there is no timeout enforced on compute jobs.
+ * In SR-IOV or passthrough mode, timeout for compute
+ * jobs are 10000 by default.
+ */
+ adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ adev->compute_timeout = adev->gfx_timeout;
+ else
+ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
+
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ timeout = msecs_to_jiffies(timeout);
+ }
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
+ }
+ }
+ /*
+ * There is only one value specified and
+ * it should apply to all non-compute jobs.
+ */
+ if (index == 1) {
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ adev->compute_timeout = adev->gfx_timeout;
+ }
+ }
+
+ return ret;
+}
/**
* amdgpu_device_init - initialize the driver
@@ -2480,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
int r, i;
- bool runtime = false;
+ bool boco = false;
u32 max_MBps;
adev->shutdown = false;
@@ -2488,7 +2792,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ddev = ddev;
adev->pdev = pdev;
adev->flags = flags;
- adev->asic_type = flags & AMD_ASIC_MASK;
+
+ if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+ adev->asic_type = amdgpu_force_asic_type;
+ else
+ adev->asic_type = flags & AMD_ASIC_MASK;
+
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
if (amdgpu_emu_mode == 1)
adev->usec_timeout *= 2;
@@ -2498,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
- adev->vm_manager.vm_pte_num_rqs = 0;
+ adev->vm_manager.vm_pte_num_scheds = 0;
adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2509,6 +2818,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->pcie_wreg = &amdgpu_invalid_wreg;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
+ adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
@@ -2536,8 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
- mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
+ mutex_init(&adev->notifier_lock);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -2629,6 +2940,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
+ r = amdgpu_device_get_job_timeout_settings(adev);
+ if (r) {
+ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+ return r;
+ }
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
@@ -2637,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* ignore it */
vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
- if (amdgpu_device_is_px(ddev))
- runtime = true;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_device_supports_boco(ddev))
+ boco = true;
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_register_client(adev->pdev,
- &amdgpu_switcheroo_ops, runtime);
- if (runtime)
+ &amdgpu_switcheroo_ops, boco);
+ if (boco)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
if (amdgpu_emu_mode == 1) {
@@ -2729,11 +3049,17 @@ fence_driver_init:
}
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
- if (amdgpu_virt_request_full_gpu(adev, false))
- amdgpu_virt_release_full_gpu(adev, false);
goto failed;
}
+ DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se,
+ adev->gfx.config.max_cu_per_sh,
+ adev->gfx.cu_info.number);
+
+ amdgpu_ctx_init_sched(adev);
+
adev->accel_working = true;
amdgpu_vm_check_compute_bug(adev);
@@ -2748,16 +3074,19 @@ fence_driver_init:
amdgpu_fbdev_init(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_init(adev);
-
r = amdgpu_pm_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->pm_sysfs_en = false;
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+ } else
+ adev->pm_sysfs_en = true;
r = amdgpu_ucode_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->ucode_sysfs_en = false;
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
r = amdgpu_debugfs_gem_init(adev);
if (r)
@@ -2788,6 +3117,13 @@ fence_driver_init:
DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
}
+ /*
+ * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+ * Otherwise the mgpu fan boost feature will be skipped due to the
+ * gpu instance is counted less.
+ */
+ amdgpu_register_gpu_instance(adev);
+
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
@@ -2819,7 +3155,7 @@ fence_driver_init:
failed:
amdgpu_vf_error_trans_all(adev);
- if (runtime)
+ if (boco)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
return r;
@@ -2838,7 +3174,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int r;
DRM_INFO("amdgpu: finishing device.\n");
+ flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
+
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
@@ -2848,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
drm_atomic_helper_shutdown(adev->ddev);
}
amdgpu_fence_driver_fini(adev);
- amdgpu_pm_sysfs_fini(adev);
+ if (adev->pm_sysfs_en)
+ amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
if (adev->firmware.gpu_info_fw) {
@@ -2856,7 +3195,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
- cancel_delayed_work_sync(&adev->delayed_init_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
@@ -2866,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
kfree(adev->bios);
adev->bios = NULL;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_unregister_client(adev->pdev);
- if (adev->flags & AMD_IS_PX)
+ if (amdgpu_device_supports_boco(adev->ddev))
vga_switcheroo_fini_domain_pm_ops(adev->dev);
vga_client_register(adev->pdev, NULL, NULL, NULL);
if (adev->rio_mem)
@@ -2877,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
- amdgpu_ucode_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
amdgpu_debugfs_preempt_cleanup(adev);
@@ -2905,11 +3245,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev;
struct drm_crtc *crtc;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
int r;
if (dev == NULL || dev->dev_private == NULL) {
@@ -2932,9 +3273,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
drm_modeset_lock_all(dev);
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
- }
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_OFF);
+ drm_connector_list_iter_end(&iter);
drm_modeset_unlock_all(dev);
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2985,17 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
*/
amdgpu_bo_evict_vram(adev);
- pci_save_state(dev->pdev);
- if (suspend) {
- /* Shut down the device */
- pci_disable_device(dev->pdev);
- pci_set_power_state(dev->pdev, PCI_D3hot);
- } else {
- r = amdgpu_asic_reset(adev);
- if (r)
- DRM_ERROR("amdgpu asic reset failed\n");
- }
-
return 0;
}
@@ -3010,9 +3342,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_device *adev = dev->dev_private;
struct drm_crtc *crtc;
int r = 0;
@@ -3020,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (resume) {
- pci_set_power_state(dev->pdev, PCI_D0);
- pci_restore_state(dev->pdev);
- r = pci_enable_device(dev->pdev);
- if (r)
- return r;
- }
-
/* post card */
if (amdgpu_device_need_post(adev)) {
r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -3083,9 +3408,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
/* turn on display hw */
drm_modeset_lock_all(dev);
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
- }
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_ON);
+ drm_connector_list_iter_end(&iter);
+
drm_modeset_unlock_all(dev);
}
amdgpu_fbdev_set_suspend(adev, 0);
@@ -3362,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
goto error;
+ amdgpu_virt_init_data_exchange(adev);
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
@@ -3386,10 +3714,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_amdkfd_post_reset(adev);
error:
- amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
- atomic_inc(&adev->vram_lost_counter);
+ amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
@@ -3431,6 +3758,12 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_VEGA20:
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
+ case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
break;
default:
goto disabled;
@@ -3507,7 +3840,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY;
} else
r = amdgpu_asic_reset(tmp_adev);
@@ -3519,7 +3852,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
}
}
- /* For XGMI wait for all PSP resets to complete before proceed */
+ /* For XGMI wait for all resets to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev, device_list_handle,
gmc.xgmi.head) {
@@ -3530,14 +3863,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
break;
}
}
-
- list_for_each_entry(tmp_adev, device_list_handle,
- gmc.xgmi.head) {
- amdgpu_ras_reserve_bad_pages(tmp_adev);
- }
}
}
+ if (!r && amdgpu_ras_intr_triggered())
+ amdgpu_ras_intr_cleared();
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
@@ -3554,7 +3884,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU reset!\n");
- atomic_inc(&tmp_adev->vram_lost_counter);
+ amdgpu_inc_vram_lost(tmp_adev);
}
r = amdgpu_gtt_mgr_recover(
@@ -3626,25 +3956,30 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
- adev->in_gpu_reset = 1;
- /* Block kfd: SRIOV would do it separately */
- if (!amdgpu_sriov_vf(adev))
- amdgpu_amdkfd_pre_reset(adev);
+ adev->in_gpu_reset = true;
+ switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_MODE1:
+ adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ adev->mp1_state = PP_MP1_STATE_RESET;
+ break;
+ default:
+ adev->mp1_state = PP_MP1_STATE_NONE;
+ break;
+ }
return true;
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
- /*unlock kfd: SRIOV would do it separately */
- if (!amdgpu_sriov_vf(adev))
- amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev);
- adev->in_gpu_reset = 0;
+ adev->mp1_state = PP_MP1_STATE_NONE;
+ adev->in_gpu_reset = false;
mutex_unlock(&adev->lock_reset);
}
-
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
@@ -3664,11 +3999,28 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+ bool use_baco =
+ (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+ true : false;
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+
+ DRM_WARN("Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
need_full_reset = job_signaled = false;
INIT_LIST_HEAD(&device_list);
- dev_info(adev->dev, "GPU reset begin!\n");
+ dev_info(adev->dev, "GPU %s begin!\n",
+ (in_ras_intr && !use_baco) ? "jobs stop":"reset");
cancel_delayed_work_sync(&adev->delayed_init_work);
@@ -3684,20 +4036,27 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (hive && !mutex_trylock(&hive->reset_lock)) {
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
- job->base.id, hive->hive_id);
+ job ? job->base.id : -1, hive->hive_id);
return 0;
}
/* Start with adev pre asic reset first for soft reset check.*/
if (!amdgpu_device_lock_adev(adev, !hive)) {
DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
- job->base.id);
+ job ? job->base.id : -1);
return 0;
}
+ /* Block kfd: SRIOV would do it separately */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_amdkfd_pre_reset(adev);
+
/* Build list of devices to reset */
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive) {
+ /*unlock kfd: SRIOV would do it separately */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_device_unlock_adev(adev);
return -ENODEV;
}
@@ -3713,17 +4072,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
- /*
- * Mark these ASICs to be reseted as untracked first
- * And add them back after reset completed
- */
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
- amdgpu_unregister_gpu_instance(tmp_adev);
-
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ if (tmp_adev != adev) {
+ amdgpu_device_lock_adev(tmp_adev, false);
+ if (!amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_pre_reset(tmp_adev);
+ }
+
+ /*
+ * Mark these ASICs to be reseted as untracked first
+ * And add them back after reset completed
+ */
+ amdgpu_unregister_gpu_instance(tmp_adev);
+
/* disable ras on ALL IPs */
- if (amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!(in_ras_intr && !use_baco) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -3732,11 +4097,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (!ring || !ring->sched.thread)
continue;
- drm_sched_stop(&ring->sched, &job->base);
+ drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+ if (in_ras_intr && !use_baco)
+ amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
+ if (in_ras_intr && !use_baco)
+ goto skip_sched_resume;
+
/*
* Must check guilty signal here since after this point all old
* HW fences are force signaled.
@@ -3747,9 +4118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dma_fence_is_signaled(job->base.s_fence->parent))
job_signaled = true;
- if (!amdgpu_device_ip_need_full_reset(adev))
- device_list_handle = &device_list;
-
if (job_signaled) {
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -3757,9 +4125,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* Guilty job will be freed after this*/
- r = amdgpu_device_pre_asic_reset(adev,
- job,
- &need_full_reset);
+ r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
if (r) {
/*TODO Should we stop ?*/
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -3773,7 +4139,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
if (tmp_adev == adev)
continue;
- amdgpu_device_lock_adev(tmp_adev, false);
r = amdgpu_device_pre_asic_reset(tmp_adev,
NULL,
&need_full_reset);
@@ -3801,6 +4166,7 @@ skip_hw_reset:
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -3822,12 +4188,18 @@ skip_hw_reset:
if (r) {
/* bad news, how to tell it to userspace ? */
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
}
+ }
+skip_sched_resume:
+ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ /*unlock kfd: SRIOV would do it separately */
+ if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_post_reset(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
@@ -3975,3 +4347,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
+int amdgpu_device_baco_enter(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+ return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ ret = amdgpu_dpm_baco_exit(adev);
+ if (ret)
+ return ret;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000000..057f6ea645d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+ bool hash_64k;
+ bool hash_2m;
+ bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+ void (*sw_init)(struct amdgpu_device *adev);
+ void (*sw_fini)(struct amdgpu_device *adev);
+ void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+ bool enable);
+ u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+ u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+ bool enable);
+ int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+ int is_enable);
+ int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+ int is_disable);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ uint64_t *count);
+ uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+ void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+ uint32_t ficadl_val, uint32_t ficadh_val);
+ uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
+ uint32_t df_inst);
+ uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_df {
+ struct amdgpu_df_hash_status hash_status;
+ const struct amdgpu_df_funcs *funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 1481899f86c1..f95092741c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -134,20 +134,10 @@ static int hw_id_map[MAX_HWIP] = {
static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
{
- uint32_t *p = (uint32_t *)binary;
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- uint64_t pos = vram_size - BINARY_MAX_SIZE;
- unsigned long flags;
-
- while (pos < vram_size) {
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
- *p++ = RREG32_NO_KIQ(mmMM_DATA);
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
- pos += 4;
- }
+ uint64_t pos = vram_size - DISCOVERY_TMR_SIZE;
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false);
return 0;
}
@@ -179,7 +169,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
uint16_t checksum;
int r;
- adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL);
+ adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
if (!adev->discovery)
return -ENOMEM;
@@ -333,7 +323,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
}
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor)
+ int *major, int *minor, int *revision)
{
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
@@ -369,6 +359,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
*major = ip->major;
if (minor)
*minor = ip->minor;
+ if (revision)
+ *revision = ip->revision;
return 0;
}
ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 85b8c4d4d576..ba78e15d9b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,11 +24,13 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
+#define DISCOVERY_TMR_SIZE (64 << 10)
+
int amdgpu_discovery_init(struct amdgpu_device *adev);
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor);
+ int *major, int *minor, int *revision);
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 535650967b1a..6d520a3eec40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -191,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
- r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
+ r = amdgpu_bo_pin(new_abo,
+ amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
@@ -204,7 +205,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto unpin;
}
- r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
+ r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl,
&work->shared_count,
&work->shared);
if (unlikely(r != 0)) {
@@ -369,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
struct amdgpu_connector *amdgpu_connector;
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
+ struct drm_connector_list_iter iter;
uint32_t devices;
int i = 0;
+ drm_connector_list_iter_begin(dev, &iter);
DRM_INFO("AMDGPU Display Connectors\n");
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
DRM_INFO("Connector %d:\n", i);
DRM_INFO(" %s\n", connector->name);
@@ -437,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
}
i++;
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -495,15 +499,37 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.create_handle = drm_gem_fb_create_handle,
};
-uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ uint64_t bo_flags)
{
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
#if defined(CONFIG_DRM_AMD_DC)
- if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
- adev->flags & AMD_IS_APU &&
- amdgpu_device_asic_has_dc_support(adev->asic_type))
- domain |= AMDGPU_GEM_DOMAIN_GTT;
+ /*
+ * if amdgpu_bo_support_uswc returns false it means that USWC mappings
+ * is not supported for this board. But this mapping is required
+ * to avoid hang caused by placement of scanout BO in GTT on certain
+ * APUs. So force the BO placement to VRAM in case this architecture
+ * will not allow USWC mappings.
+ * Also, don't allow GTT domain if the BO doens't have USWC falg set.
+ */
+ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+ amdgpu_bo_support_uswc(bo_flags) &&
+ amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+ break;
+ case CHIP_RAVEN:
+ /* enable S/G on PCO and RV2 */
+ if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+ break;
+ default:
+ break;
+ }
+ }
#endif
return domain;
@@ -674,7 +700,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct amdgpu_encoder *amdgpu_encoder;
struct drm_connector *connector;
- struct amdgpu_connector *amdgpu_connector;
u32 src_v = 1, dst_v = 1;
u32 src_h = 1, dst_h = 1;
@@ -686,7 +711,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
continue;
amdgpu_encoder = to_amdgpu_encoder(encoder);
connector = amdgpu_get_connector_for_encoder(encoder);
- amdgpu_connector = to_amdgpu_connector(connector);
/* set scaling */
if (amdgpu_encoder->rmx_type == RMX_OFF)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 06b922fe0d42..3620b24785e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -38,7 +38,8 @@
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
-uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ uint64_t bo_flags);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 489041df1f45..a59cd47aa6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -34,27 +34,12 @@
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
/**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int npages = bo->tbo.num_pages;
-
- return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
-/**
* amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
* @obj: GEM BO
*
@@ -137,23 +122,23 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
}
static int
-__reservation_object_make_exclusive(struct reservation_object *obj)
+__dma_resv_make_exclusive(struct dma_resv *obj)
{
struct dma_fence **fences;
unsigned int count;
int r;
- if (!reservation_object_get_list(obj)) /* no shared fences to convert */
+ if (!dma_resv_get_list(obj)) /* no shared fences to convert */
return 0;
- r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
+ r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences);
if (r)
return r;
if (count == 0) {
/* Now that was unexpected. */
} else if (count == 1) {
- reservation_object_add_excl_fence(obj, fences[0]);
+ dma_resv_add_excl_fence(obj, fences[0]);
dma_fence_put(fences[0]);
kfree(fences);
} else {
@@ -165,7 +150,7 @@ __reservation_object_make_exclusive(struct reservation_object *obj)
if (!array)
goto err_fences_put;
- reservation_object_add_excl_fence(obj, &array->base);
+ dma_resv_add_excl_fence(obj, &array->base);
dma_fence_put(&array->base);
}
@@ -179,106 +164,126 @@ err_fences_put:
}
/**
- * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
- * @attach: DMA-buf attachment
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
- * Makes sure that the shared DMA buffer can be accessed by the target device.
- * For now, simply pins it to the GTT domain, where it should be accessible by
- * all DMA devices.
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
*
- * Returns:
- * 0 on success or a negative error code on failure.
+ * Add the attachment as user to the exported DMA-buf.
*/
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = dma_buf->priv;
+ struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- long r;
+ int r;
- r = drm_gem_map_attach(dma_buf, attach);
- if (r)
- return r;
+ if (attach->dev->driver == adev->dev->driver)
+ return 0;
r = amdgpu_bo_reserve(bo, false);
if (unlikely(r != 0))
- goto error_detach;
-
-
- if (attach->dev->driver != adev->dev->driver) {
- /*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
- */
- r = __reservation_object_make_exclusive(bo->tbo.resv);
- if (r)
- goto error_unreserve;
- }
+ return r;
- /* pin buffer into GTT */
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ /*
+ * We only create shared fences for internal use, but importers
+ * of the dmabuf rely on exclusive fences for implicitly
+ * tracking write hazards. As any of the current fences may
+ * correspond to a write, we need to convert all existing
+ * fences on the reservation object into a single exclusive
+ * fence.
+ */
+ r = __dma_resv_make_exclusive(bo->tbo.base.resv);
if (r)
- goto error_unreserve;
-
- if (attach->dev->driver != adev->dev->driver)
- bo->prime_shared_count++;
+ return r;
-error_unreserve:
+ bo->prime_shared_count++;
amdgpu_bo_unreserve(bo);
+ return 0;
+}
-error_detach:
- if (r)
- drm_gem_map_detach(dma_buf, attach);
- return r;
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+ bo->prime_shared_count--;
}
/**
- * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
* @attach: DMA-buf attachment
+ * @dir: DMA direction
*
- * This is called when a shared DMA buffer no longer needs to be accessible by
- * another device. For now, simply unpins the buffer from GTT.
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
*/
-static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
{
+ struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int ret = 0;
+ struct sg_table *sgt;
+ long r;
- ret = amdgpu_bo_reserve(bo, true);
- if (unlikely(ret != 0))
- goto error;
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ return ERR_PTR(r);
- amdgpu_bo_unpin(bo);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
- amdgpu_bo_unreserve(bo);
+ sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
-error:
- drm_gem_map_detach(dma_buf, attach);
+ if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
+
+ return sgt;
+
+error_free:
+ sg_free_table(sgt);
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
}
/**
- * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
- * @obj: GEM BO
+ * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
+ * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
*
- * Returns:
- * The BO's reservation object.
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * another device. For now, simply unpins the buffer from GTT.
*/
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
+static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- return bo->tbo.resv;
+ dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+ sg_free_table(sgt);
+ kfree(sgt);
+ amdgpu_bo_unpin(bo);
}
/**
@@ -299,7 +304,7 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
- u32 domain = amdgpu_display_supported_domains(adev);
+ u32 domain = amdgpu_display_supported_domains(adev, bo->flags);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
@@ -322,10 +327,11 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .attach = amdgpu_dma_buf_map_attach,
- .detach = amdgpu_dma_buf_map_detach,
- .map_dma_buf = drm_gem_map_dma_buf,
- .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .dynamic_mapping = true,
+ .attach = amdgpu_dma_buf_attach,
+ .detach = amdgpu_dma_buf_detach,
+ .map_dma_buf = amdgpu_dma_buf_map,
+ .unmap_dma_buf = amdgpu_dma_buf_unmap,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
@@ -335,18 +341,15 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
/**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
- * @dev: DRM device
* @gobj: GEM BO
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
*
- * The main work is done by the &drm_gem_prime_export helper, which in turn
- * uses &amdgpu_gem_prime_res_obj.
+ * The main work is done by the &drm_gem_prime_export helper.
*
* Returns:
* Shared DMA buffer representing the GEM BO from the given device.
*/
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gobj,
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
int flags)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
@@ -356,63 +359,56 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
- buf = drm_gem_prime_export(dev, gobj, flags);
- if (!IS_ERR(buf)) {
- buf->file->f_mapping = dev->anon_inode->i_mapping;
+ buf = drm_gem_prime_export(gobj, flags);
+ if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
- }
return buf;
}
/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
* @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
*
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
*
* Returns:
* A new GEM BO of the given DRM device, representing the memory
* described by the given DMA-buf attachment and scatter/gather table.
*/
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
- struct reservation_object *resv = attach->dmabuf->resv;
+ struct dma_resv *resv = dma_buf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
memset(&bp, 0, sizeof(bp));
- bp.size = attach->dmabuf->size;
+ bp.size = dma_buf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
bp.type = ttm_bo_type_sg;
bp.resv = resv;
- ww_mutex_lock(&resv->lock, NULL);
+ dma_resv_lock(resv, NULL);
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret)
goto error;
- bo->tbo.sg = sg;
- bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
bo->prime_shared_count = 1;
- ww_mutex_unlock(&resv->lock);
- return &bo->gem_base;
+ dma_resv_unlock(resv);
+ return &bo->tbo.base;
error:
- ww_mutex_unlock(&resv->lock);
+ dma_resv_unlock(resv);
return ERR_PTR(ret);
}
@@ -421,15 +417,15 @@ error:
* @dev: DRM device
* @dma_buf: Shared DMA buffer
*
- * The main work is done by the &drm_gem_prime_import helper, which in turn
- * uses &amdgpu_gem_prime_import_sg_table.
+ * Import a dma_buf into a the driver and potentially create a new GEM object.
*
* Returns:
* GEM BO representing the shared DMA buffer for the given device.
*/
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
- struct dma_buf *dma_buf)
+ struct dma_buf *dma_buf)
{
+ struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -444,5 +440,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
}
}
- return drm_gem_prime_import(dev, dma_buf);
+ obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
+
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true);
+ if (IS_ERR(attach)) {
+ drm_gem_object_put(obj);
+ return ERR_CAST(attach);
+ }
+
+ get_dma_buf(dma_buf);
+ obj->import_attach = attach;
+ return obj;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index c7056cbe8685..ec447a7b6b28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,17 +25,10 @@
#include <drm/drm_gem.h>
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gobj,
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 790263dcc064..3fa18003d4d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -130,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
/* MMSCH: 392~407
* overlap the doorbell assignment with VCN as they are mutually exclusive
- * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+ * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
*/
- AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+ AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
+ AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */
+ AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D,
+ AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E,
+ AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F,
+
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 61bd10310604..a2e8c3dfb4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -911,7 +911,8 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
low ? &clk_freq : NULL,
- !low ? &clk_freq : NULL);
+ !low ? &clk_freq : NULL,
+ true);
if (ret)
return 0;
return clk_freq * 100;
@@ -928,7 +929,8 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
low ? &clk_freq : NULL,
- !low ? &clk_freq : NULL);
+ !low ? &clk_freq : NULL,
+ true);
if (ret)
return 0;
return clk_freq * 100;
@@ -944,20 +946,63 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
bool swsmu = is_support_sw_smu(adev);
switch (block_type) {
- case AMD_IP_BLOCK_TYPE_GFX:
case AMD_IP_BLOCK_TYPE_UVD:
- case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_VCE:
+ if (swsmu) {
+ ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+ } else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu) {
+ /*
+ * TODO: need a better lock mechanism
+ *
+ * Here adev->pm.mutex lock protection is enforced on
+ * UVD and VCE cases only. Since for other cases, there
+ * may be already lock protection in amdgpu_pm.c.
+ * This is a quick fix for the deadlock issue below.
+ * NFO: task ocltst:2028 blocked for more than 120 seconds.
+ * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu
+ * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ * cltst D 0 2028 2026 0x00000000
+ * all Trace:
+ * __schedule+0x2c0/0x870
+ * schedule+0x2c/0x70
+ * schedule_preempt_disabled+0xe/0x10
+ * __mutex_lock.isra.9+0x26d/0x4e0
+ * __mutex_lock_slowpath+0x13/0x20
+ * ? __mutex_lock_slowpath+0x13/0x20
+ * mutex_lock+0x2f/0x40
+ * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu]
+ * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu]
+ * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu]
+ * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu]
+ * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu]
+ * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu]
+ */
+ mutex_lock(&adev->pm.mutex);
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ (adev)->powerplay.pp_handle, block_type, gate));
+ mutex_unlock(&adev->pm.mutex);
+ }
+ break;
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_SDMA:
if (swsmu)
ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
- else
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
(adev)->powerplay.pp_handle, block_type, gate));
break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ if (swsmu)
+ ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+ break;
case AMD_IP_BLOCK_TYPE_GMC:
case AMD_IP_BLOCK_TYPE_ACP:
- case AMD_IP_BLOCK_TYPE_SDMA:
- ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
(adev)->powerplay.pp_handle, block_type, gate));
break;
default:
@@ -966,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
return ret;
}
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_enter(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* enter BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+ }
+
+ return ret;
+}
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_exit(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* exit BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+ }
+
+ return ret;
+}
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+ enum pp_mp1_state mp1_state)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_set_mp1_state(&adev->smu, mp1_state);
+ } else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_mp1_state) {
+ ret = adev->powerplay.pp_funcs->set_mp1_state(
+ adev->powerplay.pp_handle,
+ mp1_state);
+ }
+
+ return ret;
+}
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ bool baco_cap;
+
+ if (is_support_sw_smu(adev)) {
+ return smu_baco_is_support(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
+ return false;
+
+ if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap))
+ return false;
+
+ return baco_cap ? true : false;
+ }
+}
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+
+ if (is_support_sw_smu(adev)) {
+ return smu_mode2_reset(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->asic_reset_mode_2)
+ return -ENOENT;
+
+ return pp_funcs->asic_reset_mode_2(pp_handle);
+ }
+}
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU BACO reset\n");
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_enter(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_baco_exit(smu);
+ if (ret)
+ return ret;
+ } else {
+ if (!pp_funcs
+ || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* enter BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+ if (ret)
+ return ret;
+
+ /* exit BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+ enum PP_SMC_POWER_PROFILE type,
+ bool en)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu(adev))
+ ret = smu_switch_power_profile(&adev->smu, type, en);
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->switch_power_profile)
+ ret = adev->powerplay.pp_funcs->switch_power_profile(
+ adev->powerplay.pp_handle, type, en);
+
+ return ret;
+}
+
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+ uint32_t pstate)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu_xgmi(adev))
+ ret = smu_set_xgmi_pstate(&adev->smu, pstate);
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_xgmi_pstate)
+ ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
+ pstate);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 1c5c0fd76dbf..902ca6c00cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -298,12 +298,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_get_current_power_state(adev) \
((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
-#define amdgpu_smu_get_current_power_state(adev) \
- ((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu)))
-
-#define amdgpu_smu_set_power_state(adev) \
- ((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu)))
-
#define amdgpu_dpm_get_pp_num_states(adev, data) \
((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data))
@@ -347,10 +341,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
-#define amdgpu_dpm_switch_power_profile(adev, type, en) \
- ((adev)->powerplay.pp_funcs->switch_power_profile(\
- (adev)->powerplay.pp_handle, type, en))
-
#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
@@ -523,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+ uint32_t pstate);
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+ enum PP_SMC_POWER_PROFILE type,
+ bool en);
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+ enum pp_mp1_state mp1_state);
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5376328d3fd0..94e2fd758e01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <linux/vga_switcheroo.h>
#include <drm/drm_probe_helper.h>
+#include <linux/mmu_notifier.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
@@ -42,6 +43,8 @@
#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
+
/*
* KMS wrapper.
* - 3.0.0 - initial driver
@@ -79,13 +82,14 @@
* - 3.31.0 - Add support for per-flip tiling attribute changes with DC
* - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
* - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
+ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
+ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
+ * - 3.36.0 - Allow reading more status registers on si/cik
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 33
+#define KMS_DRIVER_MINOR 36
#define KMS_DRIVER_PATCHLEVEL 0
-#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
-
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
@@ -98,7 +102,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
-char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
@@ -125,11 +129,7 @@ char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
/* OverDrive(bit 14) disabled by default*/
uint amdgpu_pp_feature_mask = 0xffffbfff;
-int amdgpu_ngg = 0;
-int amdgpu_prim_buf_per_se = 0;
-int amdgpu_pos_buf_per_se = 0;
-int amdgpu_cntl_sb_buf_per_se = 0;
-int amdgpu_param_buf_per_se = 0;
+uint amdgpu_force_long_training = 0;
int amdgpu_job_hang_limit = 0;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
@@ -143,12 +143,13 @@ int amdgpu_mcbp = 0;
int amdgpu_discovery = -1;
int amdgpu_mes = 0;
int amdgpu_noretry;
+int amdgpu_force_asic_type = -1;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
};
int amdgpu_ras_enable = -1;
-uint amdgpu_ras_mask = 0xfffffffb;
+uint amdgpu_ras_mask = 0xffffffff;
/**
* DOC: vramlimit (int)
@@ -241,16 +242,21 @@ module_param_named(msi, amdgpu_msi, int, 0444);
*
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to default timeout.
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX. The second one is for Compute.
- * And the third and fourth ones are for SDMA and Video.
+ * to the default timeout.
+ *
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX.
+ * The second one is for Compute. The third and fourth ones are
+ * for SDMA and Video.
+ *
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
* jobs is 10000. And there is no timeout enforced on compute jobs.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs."
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+ "for passthrough or sriov, 10000 for all jobs."
" 0: keep default value. negative: infinity timeout), "
- "format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+ "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
@@ -389,6 +395,14 @@ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
/**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
+/**
* DOC: pcie_gen_cap (uint)
* Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
* The default is 0 (automatic for each asic).
@@ -446,42 +460,6 @@ MODULE_PARM_DESC(virtual_display,
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
/**
- * DOC: ngg (int)
- * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
- */
-MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
-module_param_named(ngg, amdgpu_ngg, int, 0444);
-
-/**
- * DOC: prim_buf_per_se (int)
- * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
-module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
-
-/**
- * DOC: pos_buf_per_se (int)
- * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
-module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
-
-/**
- * DOC: cntl_sb_buf_per_se (int)
- * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
-module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
-
-/**
- * DOC: param_buf_per_se (int)
- * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
- * The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
-module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
-
-/**
* DOC: job_hang_limit (int)
* Set how much time allow a job hang and not drop it. The default is 0.
*/
@@ -613,6 +591,16 @@ MODULE_PARM_DESC(noretry,
"Disable retry faults (0 = retry enabled (default), 1 = retry disabled)");
module_param_named(noretry, amdgpu_noretry, int, 0644);
+/**
+ * DOC: force_asic_type (int)
+ * A non negative value used to specify the asic type for all supported GPUs.
+ */
+MODULE_PARM_DESC(force_asic_type,
+ "A non negative value used to specify the asic type for all supported GPUs");
+module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+
+
+
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
@@ -996,6 +984,11 @@ static const struct pci_device_id pciidlist[] = {
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+ /* Arcturus */
+ {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
/* Navi10 */
{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
@@ -1004,6 +997,18 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ /* Navi14 */
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+
+ /* Renoir */
+ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+
+ /* Navi12 */
+ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@@ -1030,8 +1035,43 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
+ if (!amdgpu_si_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ dev_info(&pdev->dev,
+ "SI support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ if (!amdgpu_cik_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dev_info(&pdev->dev,
+ "CIK support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+
/* Get rid of things like offb */
- ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
+ ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
if (ret)
return ret;
@@ -1074,7 +1114,10 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
- DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
+#ifdef MODULE
+ if (THIS_MODULE->state != MODULE_STATE_GOING)
+#endif
+ DRM_ERROR("Hotplug removal is not supported\n");
drm_dev_unplug(dev);
drm_dev_put(dev);
pci_disable_device(pdev);
@@ -1087,92 +1130,117 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = dev->dev_private;
+ if (amdgpu_ras_intr_triggered())
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
* hypervisors so just do this all the time.
*/
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
amdgpu_device_ip_suspend(adev);
+ adev->mp1_state = PP_MP1_STATE_NONE;
}
static int amdgpu_pmops_suspend(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
- return amdgpu_device_suspend(drm_dev, true, true);
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_resume(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
/* GPU comes up enabled by the bios on resume */
- if (amdgpu_device_is_px(drm_dev)) {
+ if (amdgpu_device_supports_boco(drm_dev) ||
+ amdgpu_device_supports_baco(drm_dev)) {
pm_runtime_disable(dev);
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
}
- return amdgpu_device_resume(drm_dev, true, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_freeze(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
+ int r;
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
- return amdgpu_device_suspend(drm_dev, false, true);
+ r = amdgpu_device_suspend(drm_dev, true);
+ if (r)
+ return r;
+ return amdgpu_asic_reset(adev);
}
static int amdgpu_pmops_thaw(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
- return amdgpu_device_resume(drm_dev, false, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
- return amdgpu_device_suspend(drm_dev, true, true);
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_restore(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
- return amdgpu_device_resume(drm_dev, false, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
- int ret;
+ struct amdgpu_device *adev = drm_dev->dev_private;
+ int ret, i;
- if (!amdgpu_device_is_px(drm_dev)) {
+ if (!adev->runpm) {
pm_runtime_forbid(dev);
return -EBUSY;
}
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ /* wait for all rings to drain before suspending */
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ if (ring && ring->sched.ready) {
+ ret = amdgpu_fence_wait_empty(ring);
+ if (ret)
+ return -EBUSY;
+ }
+ }
+
+ if (amdgpu_device_supports_boco(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
drm_kms_helper_poll_disable(drm_dev);
- ret = amdgpu_device_suspend(drm_dev, false, false);
- pci_save_state(pdev);
- pci_disable_device(pdev);
- pci_ignore_hotplug(pdev);
- if (amdgpu_is_atpx_hybrid())
- pci_set_power_state(pdev, PCI_D3cold);
- else if (!amdgpu_has_atpx_dgpu_power_cntl())
- pci_set_power_state(pdev, PCI_D3hot);
- drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ ret = amdgpu_device_suspend(drm_dev, false);
+ if (amdgpu_device_supports_boco(drm_dev)) {
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ if (amdgpu_is_atpx_hybrid()) {
+ pci_ignore_hotplug(pdev);
+ } else {
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+ }
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_enter(drm_dev);
+ }
return 0;
}
@@ -1181,35 +1249,45 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
int ret;
- if (!amdgpu_device_is_px(drm_dev))
+ if (!adev->runpm)
return -EINVAL;
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ if (amdgpu_device_supports_boco(drm_dev)) {
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- if (amdgpu_is_atpx_hybrid() ||
- !amdgpu_has_atpx_dgpu_power_cntl())
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
- pci_set_master(pdev);
-
- ret = amdgpu_device_resume(drm_dev, false, false);
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ if (amdgpu_is_atpx_hybrid()) {
+ pci_set_master(pdev);
+ } else {
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+ pci_set_master(pdev);
+ }
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_exit(drm_dev);
+ }
+ ret = amdgpu_device_resume(drm_dev, false);
drm_kms_helper_poll_enable(drm_dev);
- drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+ if (amdgpu_device_supports_boco(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
return 0;
}
static int amdgpu_pmops_runtime_idle(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
struct drm_crtc *crtc;
- if (!amdgpu_device_is_px(drm_dev)) {
+ if (!adev->runpm) {
pm_runtime_forbid(dev);
return -EBUSY;
}
@@ -1299,66 +1377,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
return 0;
}
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
-{
- char *input = amdgpu_lockup_timeout;
- char *timeout_setting = NULL;
- int index = 0;
- long timeout;
- int ret = 0;
-
- /*
- * By default timeout for non compute jobs is 10000.
- * And there is no timeout enforced on compute jobs.
- */
- adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
-
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
-
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
-
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
- }
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1)
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- }
-
- return ret;
-}
-
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,
@@ -1373,7 +1391,8 @@ static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
DRIVER_GEM |
- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
@@ -1397,9 +1416,6 @@ static struct drm_driver kms_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
- .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
- .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
- .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
.gem_prime_mmap = amdgpu_gem_prime_mmap,
@@ -1464,6 +1480,7 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_sync_fini();
amdgpu_fence_slab_fini();
+ mmu_notifier_synchronize();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 571a6dfb473e..61fcf247a638 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
+ drm_connector_list_iter_begin(dev, &iter);
/* walk the list and link encoders to connectors */
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
}
}
}
+ drm_connector_list_iter_end(&iter);
}
void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
@@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
@@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
amdgpu_connector->devices, encoder->encoder_type);
}
}
+ drm_connector_list_iter_end(&iter);
}
struct drm_connector *
@@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector;
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
- if (amdgpu_encoder->active_device & amdgpu_connector->devices)
- return connector;
+ if (amdgpu_encoder->active_device & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
}
- return NULL;
+ drm_connector_list_iter_end(&iter);
+ return found;
}
struct drm_connector *
@@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector;
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
- if (amdgpu_encoder->devices & amdgpu_connector->devices)
- return connector;
+ if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
}
- return NULL;
+ drm_connector_list_iter_end(&iter);
+ return found;
}
struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index eb3569b46c1e..2672dc64a310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user)
return 0;
}
-static struct fb_ops amdgpufb_ops = {
+static const struct fb_ops amdgpufb_ops = {
.owner = THIS_MODULE,
DRM_FB_HELPER_DEFAULT_OPS,
.fb_open = amdgpufb_open,
@@ -131,6 +131,10 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
int aligned_size, size;
int height = mode_cmd->height;
u32 cpp;
+ u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_VRAM_CLEARED |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
info = drm_get_format_info(adev->ddev, mode_cmd);
cpp = info->cpp[0];
@@ -138,15 +142,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
fb_tiled);
- domain = amdgpu_display_supported_domains(adev);
-
+ domain = amdgpu_display_supported_domains(adev, flags);
height = ALIGN(mode_cmd->height, 8);
size = mode_cmd->pitches[0] * height;
aligned_size = ALIGN(size, PAGE_SIZE);
- ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED,
+ ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,
ttm_bo_type_kernel, NULL, &gobj);
if (ret) {
pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
@@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
dev_err(adev->dev, "FB failed to set tiling flags\n");
}
-
ret = amdgpu_bo_pin(abo, domain);
if (ret) {
amdgpu_bo_unreserve(abo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 23085b352cf2..3c01252b1e0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -34,6 +34,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_debugfs.h>
@@ -154,7 +155,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
-
+ pm_runtime_get_noresume(adev->ddev->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
struct dma_fence *old;
@@ -234,6 +235,7 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
bool amdgpu_fence_process(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct amdgpu_device *adev = ring->adev;
uint32_t seq, last_seq;
int r;
@@ -274,6 +276,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
BUG();
dma_fence_put(fence);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
} while (last_seq != seq);
return true;
@@ -462,18 +466,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
- /*
- * For non-sriov case, no timeout enforce
- * on compute ring by default. Unless user
- * specifies a timeout for compute ring.
- *
- * For sriov case, always use the timeout
- * as gfx ring
- */
- if (!amdgpu_sriov_vf(ring->adev))
- timeout = adev->compute_timeout;
- else
- timeout = adev->gfx_timeout;
+ timeout = adev->compute_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
@@ -748,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return 0;
seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL);
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index d79ab1da9e07..e01e681d2a60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -71,7 +71,7 @@
*/
static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
{
- struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+ struct page *dummy_page = ttm_bo_glob.dummy_read_page;
if (adev->dummy_page_addr)
return 0;
@@ -251,7 +251,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
mb();
amdgpu_asic_flush_hdp(adev, NULL);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
return 0;
}
@@ -300,6 +302,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
* @pages: number of pages to bind
* @pagelist: pages to bind
* @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
*
* Binds the requested pages to the gart page table
* (all asics).
@@ -310,9 +313,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
uint64_t flags)
{
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- unsigned i,t,p;
+ unsigned t,p;
#endif
- int r;
+ int r, i;
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
@@ -336,7 +339,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
mb();
amdgpu_asic_flush_hdp(adev, NULL);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 939f8305511b..4277125a79ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -50,7 +50,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct drm_gem_object **obj)
{
struct amdgpu_bo *bo;
@@ -85,7 +85,7 @@ retry:
}
return r;
}
- *obj = &bo->gem_base;
+ *obj = &bo->tbo.base;
return 0;
}
@@ -134,7 +134,7 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
return -EPERM;
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
- abo->tbo.resv != vm->root.base.bo->tbo.resv)
+ abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
@@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
@@ -215,7 +215,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
union drm_amdgpu_gem_create *args = data;
uint64_t flags = args->in.domain_flags;
uint64_t size = args->in.bo_size;
- struct reservation_object *resv = NULL;
+ struct dma_resv *resv = NULL;
struct drm_gem_object *gobj;
uint32_t handle;
int r;
@@ -252,7 +252,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
if (r)
return r;
- resv = vm->root.base.bo->tbo.resv;
+ resv = vm->root.base.bo->tbo.base.resv;
}
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
@@ -291,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
uint32_t handle;
int r;
+ args->addr = untagged_addr(args->addr);
+
if (offset_in_page(args->addr | args->size))
return -EINVAL;
@@ -433,7 +435,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
robj = gem_to_amdgpu_bo(gobj);
- ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true,
+ ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true,
timeout);
/* ret == 0 means not signaled,
@@ -525,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
}
- r = amdgpu_vm_update_directories(adev, vm);
+ r = amdgpu_vm_update_pdes(adev, vm, false);
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
}
+/**
+ * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: GEM UAPI flags
+ *
+ * Returns the GEM UAPI flags mapped into hardware for the ASIC.
+ */
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ if (adev->gmc.gmc_funcs->map_mtype)
+ pte_flag |= amdgpu_gmc_map_mtype(adev,
+ flags & AMDGPU_VM_MTYPE_MASK);
+
+ return pte_flag;
+}
+
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
@@ -611,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r)
goto error_unref;
@@ -629,7 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
@@ -644,7 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
@@ -689,7 +719,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_amdgpu_gem_create_in info;
void __user *out = u64_to_user_ptr(args->value);
- info.bo_size = robj->gem_base.size;
+ info.bo_size = robj->tbo.base.size;
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
info.domains = robj->preferred_domains;
info.domain_flags = robj->flags;
@@ -747,7 +777,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct amdgpu_device *adev = dev->dev_private;
struct drm_gem_object *gobj;
uint32_t handle;
- u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
u32 domain;
int r;
@@ -764,7 +795,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
domain = amdgpu_bo_get_preferred_pin_domain(adev,
- amdgpu_display_supported_domains(adev));
+ amdgpu_display_supported_domains(adev, flags));
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
ttm_bo_type_device, NULL, &gobj);
if (r)
@@ -819,8 +850,8 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
- dma_buf = READ_ONCE(bo->gem_base.dma_buf);
- attachment = READ_ONCE(bo->gem_base.import_attach);
+ dma_buf = READ_ONCE(bo->tbo.base.dma_buf);
+ attachment = READ_ONCE(bo->tbo.base.import_attach);
if (attachment)
seq_printf(m, " imported from %p", dma_buf);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index b8ba6e27c61f..e0f025dd1b14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -31,7 +31,7 @@
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
-#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
@@ -47,7 +47,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
@@ -67,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 74066e1466f7..0f960b498792 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
+#include "amdgpu_ras.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -231,12 +232,10 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe, me;
+ int i, queue, me;
for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
queue = i % adev->gfx.me.num_queue_per_pipe;
- pipe = (i / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
me = (i / adev->gfx.me.num_queue_per_pipe)
/ adev->gfx.me.num_pipe_per_me;
@@ -297,7 +296,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
spin_lock_init(&kiq->ring_lock);
- r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
+ r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
if (r)
return r;
@@ -320,10 +319,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
return r;
}
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
{
- amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
+ amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
}
@@ -389,7 +387,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
/* create MQD for each KGQ */
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
@@ -437,7 +435,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
struct amdgpu_ring *ring = NULL;
int i;
- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
kfree(adev->gfx.me.mqd_backup[i]);
@@ -456,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
ring = &adev->gfx.kiq.ring;
- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring)
- kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
@@ -547,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return;
- if (!is_support_sw_smu(adev) &&
- (!adev->powerplay.pp_funcs ||
- !adev->powerplay.pp_funcs->set_powergating_by_smu))
- return;
-
-
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!enable)
@@ -569,3 +559,194 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
+
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "gfx_err_count",
+ .debugfs_name = "gfx_err_inject",
+ };
+ struct ras_ih_if ih_info = {
+ .cb = amdgpu_gfx_process_ras_data_cb,
+ };
+
+ if (!adev->gfx.ras_if) {
+ adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gfx.ras_if)
+ return -ENOMEM;
+ adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
+ adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gfx.ras_if->sub_block_index = 0;
+ strcpy(adev->gfx.ras_if->name, "gfx");
+ }
+ fs_info.head = ih_info.head = *adev->gfx.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ /* free gfx ras_if if ras is not supported */
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
+free:
+ kfree(adev->gfx.ras_if);
+ adev->gfx.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
+ adev->gfx.ras_if) {
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ .cb = amdgpu_gfx_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ /* TODO ue will trigger an interrupt.
+ *
+ * When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, err_data);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ DRM_ERROR("CP ECC ERROR IRQ\n");
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_rreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_rreg(ring, reg);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_read;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ return adev->wb.wb[kiq->reg_val_offs];
+
+failed_kiq_read:
+ pr_err("failed to read reg:%x\n", reg);
+ return ~0;
+}
+
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_wreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_wreg(ring, reg, v);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_write;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_write;
+
+ return;
+
+failed_kiq_write:
+ pr_err("failed to write reg:%x\n", reg);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1199b5828b90..ca17ffb01301 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
struct amdgpu_ring *ring,
u64 addr,
u64 seq);
+ void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
int query_status_size;
+ int invalidate_tlbs_size;
};
struct amdgpu_kiq {
@@ -90,6 +94,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
+ uint32_t reg_val_offs;
};
/*
@@ -165,6 +170,7 @@ struct amdgpu_gfx_config {
uint32_t num_sc_per_sh;
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
+ uint64_t tcc_disabled_mask;
};
struct amdgpu_cu_info {
@@ -196,28 +202,8 @@ struct amdgpu_gfx_funcs {
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue, u32 vmid);
-};
-
-struct amdgpu_ngg_buf {
- struct amdgpu_bo *bo;
- uint64_t gpu_addr;
- uint32_t size;
- uint32_t bo_size;
-};
-
-enum {
- NGG_PRIM = 0,
- NGG_POS,
- NGG_CNTL,
- NGG_PARAM,
- NGG_BUF_MAX
-};
-
-struct amdgpu_ngg {
- struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
- uint32_t gds_reserve_addr;
- uint32_t gds_reserve_size;
- bool init;
+ int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+ int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
};
struct sq_work {
@@ -244,7 +230,7 @@ struct amdgpu_me {
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_GFX_RINGS];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
@@ -286,9 +272,14 @@ struct amdgpu_gfx {
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
+ bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
+ struct drm_gpu_scheduler *gfx_sched[AMDGPU_MAX_GFX_RINGS];
+ uint32_t num_gfx_sched;
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+ struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
+ uint32_t num_compute_sched;
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
@@ -308,9 +299,6 @@ struct amdgpu_gfx {
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
- /* NGG */
- struct amdgpu_ngg ngg;
-
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
struct mutex gfx_off_mutex;
@@ -352,8 +340,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
@@ -381,5 +368,14 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
-
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 924d83e711ef..5884ab590486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -27,6 +27,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
@@ -220,6 +222,14 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
+ if (amdgpu_sriov_vf(adev)) {
+ mc->agp_start = 0xffffffffffff;
+ mc->agp_end = 0x0;
+ mc->agp_size = 0;
+
+ return;
+ }
+
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
@@ -297,3 +307,69 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
gmc->fault_hash[hash].idx = gmc->last_fault++;
return false;
}
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
+ r = adev->umc.funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
+ r = adev->mmhub.funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_xgmi_ras_late_init(adev);
+}
+
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
+{
+ amdgpu_umc_ras_fini(adev);
+ amdgpu_mmhub_ras_fini(adev);
+ amdgpu_xgmi_ras_fini(adev);
+}
+
+ /*
+ * The latest engine allocation on gfx9/10 is:
+ * Engine 2, 3: firmware
+ * Engine 0, 1, 4~16: amdgpu ring,
+ * subject to change when ring number changes
+ * Engine 17: Gart flushes
+ */
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+ {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
+ GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+ unsigned i;
+ unsigned vmhub, inv_eng;
+
+ for (i = 0; i < adev->num_rings; ++i) {
+ ring = adev->rings[i];
+ vmhub = ring->funcs->vmhub;
+
+ inv_eng = ffs(vm_inv_engs[vmhub]);
+ if (!inv_eng) {
+ dev_err(adev->dev, "no VM inv eng for ring %s\n",
+ ring->name);
+ return -EINVAL;
+ }
+
+ ring->vm_inv_eng = inv_eng - 1;
+ vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+
+ dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
+ ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 071145ac67b5..d3c27a3c43f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
struct amdgpu_vmhub {
uint32_t ctx0_ptb_addr_lo32;
uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_sem;
uint32_t vm_inv_eng0_req;
uint32_t vm_inv_eng0_ack;
uint32_t vm_context0_cntl;
@@ -89,8 +90,11 @@ struct amdgpu_vmhub {
*/
struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
- void (*flush_gpu_tlb)(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type);
+ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
+ /* flush the vm tlb via pasid */
+ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -99,12 +103,15 @@ struct amdgpu_gmc_funcs {
unsigned pasid);
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* set pte flags based per asic */
- uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
- uint32_t flags);
+ /* map mtype to hardware flags */
+ uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
+ /* get the pte flags to use for a BO VA mapping */
+ void (*get_vm_pte)(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags);
};
struct amdgpu_xgmi {
@@ -120,21 +127,52 @@ struct amdgpu_xgmi {
/* gpu list in the same hive */
struct list_head head;
bool supported;
+ struct ras_common_if *ras_if;
};
struct amdgpu_gmc {
+ /* FB's physical address in MMIO space (for CPU to
+ * map FB). This is different compared to the agp/
+ * gart/vram_start/end field as the later is from
+ * GPU's view and aper_base is from CPU's view.
+ */
resource_size_t aper_size;
resource_size_t aper_base;
/* for some chips with <= 32MB we need to lie
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
+ /* AGP aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+ * Under VMID0, logical address == MC address. AGP
+ * aperture maps to physical bus or IOVA addressed.
+ * AGP aperture is used to simulate FB in ZFB case.
+ * AGP aperture is also used for page table in system
+ * memory (mainly for APU).
+ *
+ */
u64 agp_size;
u64 agp_start;
u64 agp_end;
+ /* GART aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+ * registers
+ * Under VMID0, logical address inside GART aperture will
+ * be translated through gpuvm gart page table to access
+ * paged system memory
+ */
u64 gart_size;
u64 gart_start;
u64 gart_end;
+ /* Frame buffer aperture of this GPU device. Different from
+ * fb_start (see below), this only covers the local GPU device.
+ * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
+ * and calculate vram_start of this local device by adding an
+ * offset inside the XGMI hive.
+ * Under VMID0, logical address == MC address
+ */
u64 vram_start;
u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
@@ -153,6 +191,7 @@ struct amdgpu_gmc {
uint32_t fw_version;
struct amdgpu_irq_src vm_fault;
uint32_t vram_type;
+ uint8_t vram_vendor;
uint32_t srbm_soft_reset;
bool prt_warning;
uint64_t stolen_size;
@@ -177,14 +216,17 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
- struct ras_common_if *ras_if;
};
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+ ((adev), (pasid), (type), (allhub)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
+#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -229,5 +271,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7850084a05e3..60655834d649 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* ring tests don't use a job */
if (job) {
vm = job->vm;
- fence_ctx = job->base.s_fence->scheduled.context;
+ fence_ctx = job->base.s_fence ?
+ job->base.s_fence->scheduled.context : 0;
} else {
vm = NULL;
fence_ctx = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 57b3d8a9bef3..3a67f6c046d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -104,7 +104,7 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
*
* Free the pasid only after all the fences in resv are signaled.
*/
-void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
unsigned int pasid)
{
struct dma_fence *fence, **fences;
@@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv,
unsigned count;
int r;
- r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences);
+ r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences);
if (r)
goto fallback;
@@ -156,7 +156,7 @@ fallback:
/* Not enough memory for the delayed delete, as last resort
* block for all the fences to complete.
*/
- reservation_object_wait_timeout_rcu(resv, true, false,
+ dma_resv_wait_timeout_rcu(resv, true, false,
MAX_SCHEDULE_TIMEOUT);
amdgpu_pasid_free(pasid);
}
@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
+ return amdgpu_sync_fence(sync, ring->vmid_wait, false);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return -ENOMEM;
}
- r = amdgpu_sync_fence(adev, sync, &array->base, false);
+ r = amdgpu_sync_fence(sync, &array->base, false);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base;
return r;
@@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
!dma_fence_is_later(updates, (*id)->flushed_updates))
updates = NULL;
- if ((*id)->owner != vm->entity.fence_context ||
+ if ((*id)->owner != vm->direct.fence_context ||
job->vm_pd_addr != (*id)->pd_gpu_addr ||
updates || !(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(adev, sync, tmp, false);
+ r = amdgpu_sync_fence(sync, tmp, false);
return r;
}
needs_flush = true;
@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r)
return r;
@@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
- if ((*id)->owner != vm->entity.fence_context)
+ if ((*id)->owner != vm->direct.fence_context)
continue;
if ((*id)->pd_gpu_addr != job->vm_pd_addr)
@@ -368,13 +368,14 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
* are broken on Navi10 and Navi14.
*/
if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
- adev->asic_type == CHIP_NAVI10))
+ adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14))
continue;
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r)
return r;
@@ -434,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(ring->adev, &id->active,
- fence, false);
+ r = amdgpu_sync_fence(&id->active, fence, false);
if (r)
goto error;
@@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
}
id->pd_gpu_addr = job->vm_pd_addr;
- id->owner = vm->entity.fence_context;
+ id->owner = vm->direct.fence_context;
if (job->vm_needs_flush) {
dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 7625419f0fc2..8e58325bbca2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -72,7 +72,7 @@ struct amdgpu_vmid_mgr {
int amdgpu_pasid_alloc(unsigned int bits);
void amdgpu_pasid_free(unsigned int pasid);
-void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
unsigned int pasid);
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 6d8f05511aba..111a301ce878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -66,7 +66,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
if (ih->ring == NULL)
return -ENOMEM;
- memset((void *)ih->ring, 0, ih->ring_size + 8);
ih->gpu_addr = dma_addr;
ih->wptr_addr = dma_addr + ih->ring_size;
ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 2a3f5ec298db..5ed4227f304b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -55,6 +55,7 @@
#include "amdgpu_connectors.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
#include <linux/pm_runtime.h>
@@ -87,10 +88,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
struct drm_device *dev = adev->ddev;
struct drm_mode_config *mode_config = &dev->mode_config;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
mutex_lock(&mode_config->mutex);
- list_for_each_entry(connector, &mode_config->connector_list, head)
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
amdgpu_connector_hotplug(connector);
+ drm_connector_list_iter_end(&iter);
mutex_unlock(&mode_config->mutex);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(dev);
@@ -153,6 +157,22 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
ret = amdgpu_ih_process(adev, &adev->irq.ih);
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
+
+ /* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->handle_ras_controller_intr_no_bifring)
+ adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
+ }
+
return ret;
}
@@ -228,10 +248,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
adev->irq.msi_enabled = false;
if (amdgpu_msi_ok(adev)) {
- int ret = pci_enable_msi(adev->pdev);
- if (!ret) {
+ int nvec = pci_msix_vec_count(adev->pdev);
+ unsigned int flags;
+
+ if (nvec <= 0) {
+ flags = PCI_IRQ_MSI;
+ } else {
+ flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
+ }
+ /* we only need one vector */
+ nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (nvec > 0) {
adev->irq.msi_enabled = true;
- dev_dbg(adev->dev, "amdgpu: using MSI.\n");
+ dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n");
}
}
@@ -254,7 +283,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
adev->irq.installed = true;
- r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
+ /* Use vector 0 for MSI-X */
+ r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0));
if (r) {
adev->irq.installed = false;
if (!amdgpu_device_has_dc_support(adev))
@@ -284,7 +314,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
drm_irq_uninstall(adev->ddev);
adev->irq.installed = false;
if (adev->irq.msi_enabled)
- pci_disable_msi(adev->pdev);
+ pci_free_irq_vectors(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
}
@@ -369,7 +399,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
* amdgpu_irq_dispatch - dispatch IRQ to IP blocks
*
* @adev: amdgpu device pointer
- * @entry: interrupt vector pointer
+ * @ih: interrupt ring instance
*
* Dispatches IRQ to IP blocks.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 9d76e0923a5a..d42be880a236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
if (r)
return r;
- job->owner = owner;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
@@ -193,8 +192,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
- fence, false);
+ r = amdgpu_sync_fence(&job->sched_sync, fence, false);
if (r)
DRM_ERROR("Error adding fence (%d)\n", r);
}
@@ -218,7 +216,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
- int r;
+ int r = 0;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@@ -243,9 +241,49 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
+
+ fence = r ? ERR_PTR(r) : fence;
return fence;
}
+#define to_drm_sched_job(sched_job) \
+ container_of((sched_job), struct drm_sched_job, queue_node)
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *s_job;
+ struct drm_sched_entity *s_entity = NULL;
+ int i;
+
+ /* Signal all jobs not yet scheduled */
+ for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = &sched->sched_rq[i];
+
+ if (!rq)
+ continue;
+
+ spin_lock(&rq->lock);
+ list_for_each_entry(s_entity, &rq->entities, list) {
+ while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_signal(&s_fence->scheduled);
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+ }
+ spin_unlock(&rq->lock);
+ }
+
+ /* Signal all jobs already scheduled to HW */
+ list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+}
+
const struct drm_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_job_dependency,
.run_job = amdgpu_job_run,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 51e62504c279..3f7b8433d179 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -49,7 +49,6 @@ struct amdgpu_job {
uint32_t preamble_status;
uint32_t preemption_status;
uint32_t num_ibs;
- void *owner;
bool vm_needs_flush;
uint64_t vm_pd_addr;
unsigned vmid;
@@ -76,4 +75,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
void *owner, struct dma_fence **f);
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct dma_fence **fence);
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
new file mode 100644
index 000000000000..5727f00afc8e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
+{
+ INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+
+ return 0;
+}
+
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+ }
+
+ return 0;
+}
+
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev)
+{
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ return 0;
+}
+
+int amdgpu_jpeg_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, jpeg.idle_work.work);
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+ }
+
+ if (fences == 0)
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_GATE);
+ else
+ schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (set_clocks)
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_UNGATE);
+}
+
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
+{
+ schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ const unsigned ib_size_dw = 16;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+ ib->ptr[1] = 0xDEADBEEF;
+ for (i = 2; i < 16; i += 2) {
+ ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ struct dma_fence *fence = NULL;
+ long r = 0;
+
+ r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ goto error;
+ } else {
+ r = 0;
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ dma_fence_put(fence);
+error:
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
new file mode 100644
index 000000000000..bd9ef9cc86de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_JPEG_H__
+#define __AMDGPU_JPEG_H__
+
+#define AMDGPU_MAX_JPEG_INSTANCES 2
+
+#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
+#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+
+struct amdgpu_jpeg_reg{
+ unsigned jpeg_pitch;
+};
+
+struct amdgpu_jpeg_inst {
+ struct amdgpu_ring ring_dec;
+ struct amdgpu_irq_src irq;
+ struct amdgpu_jpeg_reg external;
+};
+
+struct amdgpu_jpeg {
+ uint8_t num_jpeg_inst;
+ struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ struct amdgpu_jpeg_reg internal;
+ struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES];
+ uint32_t num_jpeg_sched;
+ unsigned harvest_config;
+ struct delayed_work idle_work;
+ enum amd_powergating_state cur_state;
+};
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev);
+int amdgpu_jpeg_resume(struct amdgpu_device *adev);
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 0cf7e8606fd3..60591dbc2097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
- if (amdgpu_device_is_px(dev)) {
+ if (adev->runpm) {
pm_runtime_get_sync(dev->dev);
pm_runtime_forbid(dev->dev);
}
@@ -144,49 +144,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
struct amdgpu_device *adev;
int r, acpi_status;
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(dev->dev,
- "SI support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(dev->dev,
- "CIK support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
if (adev == NULL) {
return -ENOMEM;
}
dev->dev_private = (void *)adev;
- if ((amdgpu_runtime_pm != 0) &&
- amdgpu_has_atpx() &&
+ if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
((flags & AMD_IS_APU) == 0) &&
@@ -205,6 +169,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
goto out;
}
+ if (amdgpu_device_supports_boco(dev) &&
+ (amdgpu_runtime_pm != 0)) /* enable runpm by default */
+ adev->runpm = true;
+ else if (amdgpu_device_supports_baco(dev) &&
+ (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */
+ adev->runpm = true;
+
/* Call ACPI methods: require modeset init
* but failure is not fatal
*/
@@ -215,7 +186,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n");
}
- if (amdgpu_device_is_px(dev)) {
+ if (adev->runpm) {
dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP);
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);
@@ -225,11 +196,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
- amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
- if (adev->rmmio && amdgpu_device_is_px(dev))
+ if (adev->rmmio && adev->runpm)
pm_runtime_put_noidle(dev->dev);
amdgpu_driver_unload_kms(dev);
}
@@ -329,6 +299,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->dm.dmcu_fw_version;
fw_info->feature = 0;
break;
+ case AMDGPU_INFO_FW_DMCUB:
+ fw_info->ver = adev->dm.dmcub_fw_version;
+ fw_info->feature = 0;
+ break;
default:
return -EINVAL;
}
@@ -408,23 +382,40 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
- if (adev->vcn.ring_dec.sched.ready)
- ++num_rings;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ if (adev->vcn.inst[i].ring_dec.sched.ready)
+ ++num_rings;
+ }
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
- for (i = 0; i < adev->vcn.num_enc_rings; i++)
- if (adev->vcn.ring_enc[i].sched.ready)
- ++num_rings;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ ++num_rings;
+ }
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
- type = AMD_IP_BLOCK_TYPE_VCN;
- if (adev->vcn.ring_jpeg.sched.ready)
- ++num_rings;
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (adev->jpeg.inst[i].ring_dec.sched.ready)
+ ++num_rings;
+ }
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
@@ -538,9 +529,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
- case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
default:
return -EINVAL;
}
@@ -604,9 +598,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_vram_gtt vram_gtt;
vram_gtt.vram_size = adev->gmc.real_vram_size -
- atomic64_read(&adev->vram_pin_size);
- vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
- atomic64_read(&adev->visible_pin_size);
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
+ vram_gtt.vram_cpu_accessible_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ vram_gtt.vram_size);
vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
@@ -619,15 +616,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
- atomic64_read(&adev->vram_pin_size);
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
adev->gmc.visible_vram_size;
- mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
- atomic64_read(&adev->visible_pin_size);
+ mem.cpu_accessible_vram.usable_heap_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ mem.vram.usable_heap_size);
mem.cpu_accessible_vram.heap_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation =
@@ -662,20 +662,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
sh_num = 0xffffffff;
+ if (info->read_mmr_reg.count > 128)
+ return -EINVAL;
+
regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
if (!regs)
return -ENOMEM;
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
- for (i = 0; i < info->read_mmr_reg.count; i++)
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
kfree(regs);
+ amdgpu_gfx_off_ctrl(adev, true);
return -EFAULT;
}
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
return n ? -EFAULT : 0;
@@ -696,10 +703,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (adev->pm.dpm_enabled) {
dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
- } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk) {
- dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10;
- dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10;
} else {
dev_info.max_engine_clock = adev->clock.default_sclk * 10;
dev_info.max_memory_clock = adev->clock.default_mclk * 10;
@@ -746,17 +749,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.vce_harvest_config = adev->vce.harvest_config;
dev_info.gc_double_offchip_lds_buf =
adev->gfx.config.double_offchip_lds_buf;
-
- if (amdgpu_ngg) {
- dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr;
- dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size;
- dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr;
- dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size;
- dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr;
- dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size;
- dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr;
- dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size;
- }
dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
@@ -769,6 +761,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.pa_sc_tile_steering_override =
adev->gfx.config.pa_sc_tile_steering_override;
+ dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+
return copy_to_user(out, &dev_info,
min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
}
@@ -983,6 +977,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
/* Ensure IB tests are run on ring */
flush_delayed_work(&adev->delayed_init_work);
+
+ if (amdgpu_ras_intr_triggered()) {
+ DRM_ERROR("RAS Intr triggered, device disabled!!");
+ return -EHWPOISON;
+ }
+
file_priv->driver_priv = NULL;
r = pm_runtime_get_sync(dev->dev);
@@ -1088,7 +1088,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
amdgpu_vm_fini(adev, &fpriv->vm);
if (pasid)
- amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
+ amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
amdgpu_bo_unref(&pd);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -1405,6 +1405,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* DMCUB */
+ query_fw.fw_type = AMDGPU_INFO_FW_DMCUB;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
new file mode 100644
index 000000000000..676c48c02d77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "mmhub_err_count",
+ .debugfs_name = "mmhub_err_inject",
+ };
+
+ if (!adev->mmhub.ras_if) {
+ adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->mmhub.ras_if)
+ return -ENOMEM;
+ adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
+ adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mmhub.ras_if->sub_block_index = 0;
+ strcpy(adev->mmhub.ras_if->name, "mmhub");
+ }
+ ih_info.head = fs_info.head = *adev->mmhub.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
+ kfree(adev->mmhub.ras_if);
+ adev->mmhub.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
+ adev->mmhub.ras_if) {
+ struct ras_common_if *ras_if = adev->mmhub.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
new file mode 100644
index 000000000000..1cd78940cf82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MMHUB_H__
+#define __AMDGPU_MMHUB_H__
+
+struct amdgpu_mmhub_funcs {
+ void (*ras_init)(struct amdgpu_device *adev);
+ int (*ras_late_init)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+};
+
+struct amdgpu_mmhub {
+ struct ras_common_if *ras_if;
+ const struct amdgpu_mmhub_funcs *funcs;
+};
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 3971c201f320..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,437 +51,107 @@
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
*
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
- struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = amn->adev;
- struct amdgpu_mn_node *node, *next_node;
- struct amdgpu_bo *bo, *next_bo;
-
- mutex_lock(&adev->mn_lock);
- down_write(&amn->lock);
- hash_del(&amn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
- &amn->objects.rb_root, it.rb) {
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-
- hmm_mirror_unregister(&amn->mirror);
- kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
- INIT_WORK(&amn->work, amdgpu_mn_destroy);
- schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
- if (mn)
- down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
- if (mn)
- up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
- if (blockable)
- down_read(&amn->lock);
- else if (!down_read_trylock(&amn->lock))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
- up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
- unsigned long start,
- unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
-
- if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
- continue;
-
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- }
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct hmm_update *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = update->blockable;
- struct interval_tree_node *it;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
+ mutex_lock(&adev->notifier_lock);
- /* TODO we should be able to split locking for interval tree and
- * amdgpu_mn_invalidate_node
- */
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
+ mmu_interval_set_seq(mni, cur_seq);
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- amdgpu_mn_invalidate_node(node, start, end);
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct hmm_update *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = update->blockable;
- struct interval_tree_node *it;
-
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
-
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
- struct amdgpu_bo *bo;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
-
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
- }
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
}
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
- [AMDGPU_MN_TYPE_GFX] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
- .release = amdgpu_hmm_mirror_release
- },
- [AMDGPU_MN_TYPE_HSA] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
- .release = amdgpu_hmm_mirror_release
- },
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+ .invalidate = amdgpu_mn_invalidate_gfx,
};
/**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
*
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct mm_struct *mm = current->mm;
- struct amdgpu_mn *amn;
- unsigned long key = AMDGPU_MN_KEY(mm, type);
- int r;
-
- mutex_lock(&adev->mn_lock);
- if (down_write_killable(&mm->mmap_sem)) {
- mutex_unlock(&adev->mn_lock);
- return ERR_PTR(-EINTR);
- }
-
- hash_for_each_possible(adev->mn_hash, amn, node, key)
- if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
- goto release_locks;
-
- amn = kzalloc(sizeof(*amn), GFP_KERNEL);
- if (!amn) {
- amn = ERR_PTR(-ENOMEM);
- goto release_locks;
- }
-
- amn->adev = adev;
- amn->mm = mm;
- init_rwsem(&amn->lock);
- amn->type = type;
- amn->objects = RB_ROOT_CACHED;
-
- amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
- r = hmm_mirror_register(&amn->mirror, mm);
- if (r)
- goto free_amn;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+ if (!mmu_notifier_range_blockable(range))
+ return false;
-release_locks:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
+ mutex_lock(&adev->notifier_lock);
- return amn;
+ mmu_interval_set_seq(mni, cur_seq);
-free_amn:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
- kfree(amn);
+ amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+ mutex_unlock(&adev->notifier_lock);
- return ERR_PTR(r);
+ return true;
}
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+ .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
/**
* amdgpu_mn_register - register a BO for notifier updates
*
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
- unsigned long end = addr + amdgpu_bo_size(bo) - 1;
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- enum amdgpu_mn_type type =
- bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *amn;
- struct amdgpu_mn_node *node = NULL, *new_node;
- struct list_head bos;
- struct interval_tree_node *it;
-
- amn = amdgpu_mn_get(adev, type);
- if (IS_ERR(amn))
- return PTR_ERR(amn);
-
- new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
- if (!new_node)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bos);
-
- down_write(&amn->lock);
-
- while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &amn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node)
- node = new_node;
- else
- kfree(new_node);
-
- bo->mn = amn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &amn->objects);
-
- up_write(&amn->lock);
-
- return 0;
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_mn_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_mn_gfx_ops);
}
/**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *amn;
- struct list_head *head;
-
- mutex_lock(&adev->mn_lock);
-
- amn = bo->mn;
- if (amn == NULL) {
- mutex_unlock(&adev->mn_lock);
+ if (!bo->notifier.mm)
return;
- }
-
- down_write(&amn->lock);
-
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
-
- if (list_empty(head)) {
- struct amdgpu_mn_node *node;
-
- node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &amn->objects);
- kfree(node);
- }
-
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
- (1 << 0), /* HMM_PFN_VALID */
- (1 << 1), /* HMM_PFN_WRITE */
- 0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
- 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
- 0, /* HMM_PFN_NONE */
- 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
- if (range) {
- range->flags = hmm_range_flags;
- range->values = hmm_range_values;
- range->pfn_shift = PAGE_SHIFT;
- INIT_LIST_HEAD(&range->list);
- }
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
-enum amdgpu_mn_type {
- AMDGPU_MN_TYPE_GFX,
- AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
-
-#ifdef CONFIG_HMM_MIRROR
- /* HMM mirror */
- struct hmm_mirror mirror;
-#endif
-};
-
#if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
#else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
-{
- return NULL;
-}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
new file mode 100644
index 000000000000..7d5c3a9de9ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "pcie_bif_err_count",
+ .debugfs_name = "pcie_bif_err_inject",
+ };
+
+ if (!adev->nbio.ras_if) {
+ adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->nbio.ras_if)
+ return -ENOMEM;
+ adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if->sub_block_index = 0;
+ strcpy(adev->nbio.ras_if->name, "pcie_bif");
+ }
+ ih_info.head = fs_info.head = *adev->nbio.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
+ if (r)
+ goto late_fini;
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info);
+free:
+ kfree(adev->nbio.ras_if);
+ adev->nbio.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
+ adev->nbio.ras_if) {
+ struct ras_common_if *ras_if = adev->nbio.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
new file mode 100644
index 000000000000..919bd566ba3c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_NBIO_H__
+#define __AMDGPU_NBIO_H__
+
+/*
+ * amdgpu nbio functions
+ */
+struct nbio_hdp_flush_reg {
+ u32 ref_and_mask_cp0;
+ u32 ref_and_mask_cp1;
+ u32 ref_and_mask_cp2;
+ u32 ref_and_mask_cp3;
+ u32 ref_and_mask_cp4;
+ u32 ref_and_mask_cp5;
+ u32 ref_and_mask_cp6;
+ u32 ref_and_mask_cp7;
+ u32 ref_and_mask_cp8;
+ u32 ref_and_mask_cp9;
+ u32 ref_and_mask_sdma0;
+ u32 ref_and_mask_sdma1;
+ u32 ref_and_mask_sdma2;
+ u32 ref_and_mask_sdma3;
+ u32 ref_and_mask_sdma4;
+ u32 ref_and_mask_sdma5;
+ u32 ref_and_mask_sdma6;
+ u32 ref_and_mask_sdma7;
+};
+
+struct amdgpu_nbio_funcs {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+ u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_rev_id)(struct amdgpu_device *adev);
+ void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+ void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+ u32 (*get_memsize)(struct amdgpu_device *adev);
+ void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance);
+ void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*ih_doorbell_range)(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index);
+ void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*ih_control)(struct amdgpu_device *adev);
+ void (*init_registers)(struct amdgpu_device *adev);
+ void (*detect_hw_virt)(struct amdgpu_device *adev);
+ void (*remap_hdp_registers)(struct amdgpu_device *adev);
+ void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+ void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+ int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+ int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ int (*ras_late_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ struct amdgpu_irq_src ras_controller_irq;
+ struct amdgpu_irq_src ras_err_event_athub_irq;
+ struct ras_common_if *ras_if;
+ const struct amdgpu_nbio_funcs *funcs;
+};
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index bea6f298dfdc..e3f16b49e970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -80,14 +80,11 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
if (bo->pin_count > 0)
amdgpu_bo_subtract_pin_size(bo);
- if (bo->kfd_bo)
- amdgpu_amdkfd_unreserve_memory_limit(bo);
-
amdgpu_bo_kunmap(bo);
- if (bo->gem_base.import_attach)
- drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
- drm_gem_object_release(&bo->gem_base);
+ if (bo->tbo.base.import_attach)
+ drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
+ drm_gem_object_release(&bo->tbo.base);
/* in case amdgpu_device_recover_vram got NULL of bo->parent */
if (!list_empty(&bo->shadow_list)) {
mutex_lock(&adev->shadow_list_lock);
@@ -249,8 +246,9 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bp.size = size;
bp.byte_align = align;
bp.domain = domain;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
@@ -345,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
/**
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
+ *
+ * @adev: amdgpu device object
+ * @offset: offset of the BO
+ * @size: size of the BO
+ * @domain: where to place it
+ * @bo_ptr: used to initialize BOs in structures
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Creates a kernel BO at a specific offset in the address space of the domain.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size, uint32_t domain,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ unsigned int i;
+ int r;
+
+ offset &= PAGE_MASK;
+ size = ALIGN(size, PAGE_SIZE);
+
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
+ NULL, cpu_addr);
+ if (r)
+ return r;
+
+ /*
+ * Remove the original mem node and create a new one at the request
+ * position.
+ */
+ if (cpu_addr)
+ amdgpu_bo_kunmap(*bo_ptr);
+
+ ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem);
+
+ for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
+ (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
+ (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+ }
+ r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
+ &(*bo_ptr)->tbo.mem, &ctx);
+ if (r)
+ goto error;
+
+ if (cpu_addr) {
+ r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+ if (r)
+ goto error;
+ }
+
+ amdgpu_bo_unreserve(*bo_ptr);
+ return 0;
+
+error:
+ amdgpu_bo_unreserve(*bo_ptr);
+ amdgpu_bo_unref(bo_ptr);
+ return r;
+}
+
+/**
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
@@ -413,15 +475,50 @@ fail:
return false;
}
+bool amdgpu_bo_support_uswc(u64 bo_flags)
+{
+
+#ifdef CONFIG_X86_32
+ /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
+ */
+ return false;
+#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
+ /* Don't try to enable write-combining when it can't work, or things
+ * may be slow
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
+ */
+
+#ifndef CONFIG_COMPILE_TEST
+#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
+ thanks to write-combining
+#endif
+
+ if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+ DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
+ "better performance thanks to write-combining\n");
+ return false;
+#else
+ /* For architectures that don't support WC memory,
+ * mask out the WC flag from the BO
+ */
+ if (!drm_arch_can_wc_memory())
+ return false;
+
+ return true;
+#endif
+}
+
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
struct ttm_operation_ctx ctx = {
.interruptible = (bp->type != ttm_bo_type_kernel),
- .no_wait_gpu = false,
+ .no_wait_gpu = bp->no_wait_gpu,
.resv = bp->resv,
- .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
+ .flags = bp->type != ttm_bo_type_kernel ?
+ TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
};
struct amdgpu_bo *bo;
unsigned long page_align, size = bp->size;
@@ -454,7 +551,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
if (bo == NULL)
return -ENOMEM;
- drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
+ drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size);
INIT_LIST_HEAD(&bo->shadow_list);
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
@@ -466,33 +563,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo->flags = bp->flags;
-#ifdef CONFIG_X86_32
- /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
- * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
- */
- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
- /* Don't try to enable write-combining when it can't work, or things
- * may be slow
- * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
- */
-
-#ifndef CONFIG_COMPILE_TEST
-#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
- thanks to write-combining
-#endif
-
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
- DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
- "better performance thanks to write-combining\n");
- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-#else
- /* For architectures that don't support WC memory,
- * mask out the WC flag from the BO
- */
- if (!drm_arch_can_wc_memory())
+ if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-#endif
bo->tbo.bdev = &adev->mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -521,7 +593,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
@@ -544,7 +616,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
fail_unreserve:
if (!bp->resv)
- ww_mutex_unlock(&bo->tbo.resv->lock);
+ dma_resv_unlock(bo->tbo.base.resv);
amdgpu_bo_unref(&bo);
return r;
}
@@ -565,7 +637,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW;
bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.resv;
+ bp.resv = bo->tbo.base.resv;
r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
if (!r) {
@@ -606,13 +678,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv)
- WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
+ WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv)
- reservation_object_unlock((*bo_ptr)->tbo.resv);
+ dma_resv_unlock((*bo_ptr)->tbo.base.resv);
if (r)
amdgpu_bo_unref(bo_ptr);
@@ -709,7 +781,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
MAX_SCHEDULE_TIMEOUT);
if (r < 0)
return r;
@@ -1051,7 +1123,10 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma)
{
- return ttm_fbdev_mmap(vma, &bo->tbo);
+ if (vma->vm_pgoff != 0)
+ return -EACCES;
+
+ return ttm_bo_mmap_obj(vma, &bo->tbo);
}
/**
@@ -1087,7 +1162,7 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
*/
void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
- lockdep_assert_held(&bo->tbo.resv->lock.base);
+ dma_resv_assert_held(bo->tbo.base.resv);
if (tiling_flags)
*tiling_flags = bo->tiling_flags;
@@ -1212,6 +1287,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
}
/**
+ * amdgpu_bo_move_notify - notification about a BO being released
+ * @bo: pointer to a buffer object
+ *
+ * Wipes VRAM buffers whose contents should not be leaked before the
+ * memory is released.
+ */
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
+{
+ struct dma_fence *fence = NULL;
+ struct amdgpu_bo *abo;
+ int r;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return;
+
+ abo = ttm_to_amdgpu_bo(bo);
+
+ if (abo->kfd_bo)
+ amdgpu_amdkfd_unreserve_memory_limit(abo);
+
+ if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
+ return;
+
+ dma_resv_lock(bo->base.resv, NULL);
+
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+ if (!WARN_ON(r)) {
+ amdgpu_bo_fence(abo, fence, false);
+ dma_fence_put(fence);
+ }
+
+ dma_resv_unlock(bo->base.resv);
+}
+
+/**
* amdgpu_bo_fault_reserve_notify - notification about a memory fault
* @bo: pointer to a buffer object
*
@@ -1283,12 +1394,12 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared)
{
- struct reservation_object *resv = bo->tbo.resv;
+ struct dma_resv *resv = bo->tbo.base.resv;
if (shared)
- reservation_object_add_shared_fence(resv, fence);
+ dma_resv_add_shared_fence(resv, fence);
else
- reservation_object_add_excl_fence(resv, fence);
+ dma_resv_add_excl_fence(resv, fence);
}
/**
@@ -1308,7 +1419,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
int r;
amdgpu_sync_create(&sync);
- amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false);
+ amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false);
r = amdgpu_sync_wait(&sync, intr);
amdgpu_sync_free(&sync);
@@ -1328,7 +1439,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
- WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
+ WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&
!bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d60593cc436e..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
@@ -41,7 +44,8 @@ struct amdgpu_bo_param {
u32 preferred_domain;
u64 flags;
enum ttm_bo_type type;
- struct reservation_object *resv;
+ bool no_wait_gpu;
+ struct dma_resv *resv;
};
/* bo virtual addresses in a vm */
@@ -94,17 +98,18 @@ struct amdgpu_bo {
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
- struct drm_gem_object gem_base;
struct amdgpu_bo *parent;
struct amdgpu_bo *shadow;
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
- union {
- struct list_head mn_list;
- struct list_head shadow_list;
- };
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+
+ struct list_head shadow_list;
struct kgd_mem *kfd_bo;
};
@@ -192,7 +197,7 @@ static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo)
*/
static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
{
- return drm_vma_node_offset_addr(&bo->tbo.vma_node);
+ return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
}
/**
@@ -238,6 +243,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size, uint32_t domain,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
@@ -265,6 +273,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_mem_reg *new_mem);
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared);
@@ -308,5 +317,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
#endif
+bool amdgpu_bo_support_uswc(u64 bo_flags);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 2b546567853b..b03b1eb7ba04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -37,6 +37,7 @@
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
#include "hwmgr.h"
#define WIDTH_4K 3840
@@ -158,10 +159,18 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_pm_state_type pm;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
- pm = amdgpu_smu_get_current_power_state(adev);
+ pm = smu_get_current_power_state(&adev->smu);
else
pm = adev->pm.dpm.user_state;
} else if (adev->powerplay.pp_funcs->get_current_power_state) {
@@ -170,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
pm = adev->pm.dpm.user_state;
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%s\n",
(pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
@@ -183,6 +195,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_pm_state_type state;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
if (strncmp("battery", buf, strlen("battery")) == 0)
state = POWER_STATE_TYPE_BATTERY;
@@ -190,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
state = POWER_STATE_TYPE_BALANCED;
else if (strncmp("performance", buf, strlen("performance")) == 0)
state = POWER_STATE_TYPE_PERFORMANCE;
- else {
- count = -EINVAL;
- goto fail;
- }
+ else
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
mutex_lock(&adev->pm.mutex);
@@ -206,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
adev->pm.dpm.user_state = state;
mutex_unlock(&adev->pm.mutex);
- /* Can't set dpm state when the card is off */
- if (!(adev->flags & AMD_IS_PX) ||
- (ddev->switch_power_state == DRM_SWITCH_POWER_ON))
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_pm_compute_clocks(adev);
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -282,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_dpm_forced_level level = 0xff;
+ int ret;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return snprintf(buf, PAGE_SIZE, "off\n");
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
level = smu_get_performance_level(&adev->smu);
@@ -297,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
else
level = adev->pm.dpm.forced_level;
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%s\n",
(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
@@ -320,18 +341,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
enum amd_dpm_forced_level current_level = 0xff;
int ret = 0;
- /* Can't force performance level when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return -EINVAL;
- if (!amdgpu_sriov_vf(adev)) {
- if (is_support_sw_smu(adev))
- current_level = smu_get_performance_level(&adev->smu);
- else if (adev->powerplay.pp_funcs->get_performance_level)
- current_level = amdgpu_dpm_get_performance_level(adev);
- }
-
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMD_DPM_FORCED_LEVEL_LOW;
} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -351,24 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
} else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) {
level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
} else {
- count = -EINVAL;
- goto fail;
- }
-
- if (amdgpu_sriov_vf(adev)) {
- if (amdgim_is_hwperf(adev) &&
- adev->virt.ops->force_dpm_level) {
- mutex_lock(&adev->pm.mutex);
- adev->virt.ops->force_dpm_level(adev, level);
- mutex_unlock(&adev->pm.mutex);
- return count;
- } else {
- return -EINVAL;
- }
- }
+ return -EINVAL;
+ }
- if (current_level == level)
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
+ if (is_support_sw_smu(adev))
+ current_level = smu_get_performance_level(&adev->smu);
+ else if (adev->powerplay.pp_funcs->get_performance_level)
+ current_level = amdgpu_dpm_get_performance_level(adev);
+
+ if (current_level == level) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
+ }
/* profile_exit setting is valid only when current mode is in profile mode */
if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
@@ -377,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
(level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
pr_err("Currently not in any profile mode!\n");
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
}
if (is_support_sw_smu(adev)) {
ret = smu_force_performance_level(&adev->smu, level);
- if (ret)
- count = -EINVAL;
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
+ }
} else if (adev->powerplay.pp_funcs->force_performance_level) {
mutex_lock(&adev->pm.mutex);
if (adev->pm.dpm.thermal_active) {
- count = -EINVAL;
mutex_unlock(&adev->pm.mutex);
- goto fail;
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
}
ret = amdgpu_dpm_force_performance_level(adev, level);
- if (ret)
- count = -EINVAL;
- else
+ if (ret) {
+ mutex_unlock(&adev->pm.mutex);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
+ } else {
adev->pm.dpm.forced_level = level;
+ }
mutex_unlock(&adev->pm.mutex);
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
-fail:
return count;
}
@@ -412,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
struct pp_states_info data;
int i, buf_len, ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_get_power_num_states(&adev->smu, &data);
if (ret)
@@ -419,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
} else if (adev->powerplay.pp_funcs->get_pp_num_states)
amdgpu_dpm_get_pp_num_states(adev, &data);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
for (i = 0; i < data.nums; i++)
buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
@@ -441,6 +470,13 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
enum amd_pm_state_type pm = 0;
int i = 0, ret = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
pm = smu_get_current_power_state(smu);
ret = smu_get_power_num_states(smu, &data);
@@ -452,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
amdgpu_dpm_get_pp_num_states(adev, &data);
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
for (i = 0; i < data.nums; i++) {
if (pm == data.states[i])
break;
@@ -470,6 +509,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
if (adev->pp_force_state_enabled)
return amdgpu_get_pp_cur_state(dev, attr, buf);
else
@@ -487,6 +529,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
unsigned long idx;
int ret;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
else if (is_support_sw_smu(adev))
@@ -496,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
struct pp_states_info data;
ret = kstrtoul(buf, 0, &idx);
- if (ret || idx >= ARRAY_SIZE(data.states)) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret || idx >= ARRAY_SIZE(data.states))
+ return -EINVAL;
+
idx = array_index_nospec(idx, ARRAY_SIZE(data.states));
amdgpu_dpm_get_pp_num_states(adev, &data);
state = data.states[idx];
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
/* only set user selected power states */
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
state != POWER_STATE_TYPE_DEFAULT) {
@@ -511,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
AMD_PP_TASK_ENABLE_USER_STATE, &state);
adev->pp_force_state_enabled = true;
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
}
-fail:
+
return count;
}
@@ -534,17 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
char *table = NULL;
- int size;
+ int size, ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (size < 0)
return size;
- }
- else if (adev->powerplay.pp_funcs->get_pp_table)
+ } else if (adev->powerplay.pp_funcs->get_pp_table) {
size = amdgpu_dpm_get_pp_table(adev, &table);
- else
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ if (size < 0)
+ return size;
+ } else {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return 0;
+ }
if (size >= PAGE_SIZE)
size = PAGE_SIZE - 1;
@@ -563,13 +629,26 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
} else if (adev->powerplay.pp_funcs->set_pp_table)
amdgpu_dpm_set_pp_table(adev, buf, count);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -655,6 +734,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
const char delimiter[3] = {' ', '\n', '\0'};
uint32_t type;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
if (count > 127)
return -EINVAL;
@@ -690,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
tmp_str++;
}
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_od_edit_dpm_table(&adev->smu, type,
parameter, parameter_size);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
} else {
if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
parameter, parameter_size);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
}
if (type == PP_OD_COMMIT_DPM_TABLE) {
@@ -709,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
amdgpu_dpm_dispatch_task(adev,
AMD_PP_TASK_READJUST_POWER_STATE,
NULL);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
} else {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
}
}
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
}
@@ -725,31 +823,40 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- uint32_t size = 0;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size);
size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size);
size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size);
- return size;
} else if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
- return size;
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return size;
}
/**
- * DOC: ppfeatures
+ * DOC: pp_features
*
* The amdgpu driver provides a sysfs API for adjusting what powerplay
- * features to be enabled. The file ppfeatures is used for this. And
+ * features to be enabled. The file pp_features is used for this. And
* this is only available for Vega10 and later dGPUs.
*
* Reading back the file will show you the followings:
@@ -761,7 +868,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
* the corresponding bit from original ppfeature masks and input the
* new ppfeature masks.
*/
-static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
+static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
@@ -771,43 +878,71 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
uint64_t featuremask;
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = kstrtou64(buf, 0, &featuremask);
if (ret)
return -EINVAL;
pr_debug("featuremask = 0x%llx\n", featuremask);
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
- ret = smu_set_ppfeature_status(&adev->smu, featuremask);
- if (ret)
+ ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
}
-static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
+static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
- if (is_support_sw_smu(adev)) {
- return smu_get_ppfeature_status(&adev->smu, buf);
- } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
- return amdgpu_dpm_get_ppfeature_status(adev, buf);
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
- return snprintf(buf, PAGE_SIZE, "\n");
+ if (is_support_sw_smu(adev))
+ size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
+ else if (adev->powerplay.pp_funcs->get_ppfeature_status)
+ size = amdgpu_dpm_get_ppfeature_status(adev, buf);
+ else
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
/**
- * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk
- * pp_dpm_pcie
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie
*
* The amdgpu driver provides a sysfs API for adjusting what power levels
* are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
@@ -823,9 +958,15 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
*
* To manually adjust these states, first select manual using
* power_dpm_force_performance_level.
- * Secondly,Enter a new value for each level by inputing a string that
+ * Secondly, enter a new value for each level by inputing a string that
* contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
- * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ * E.g.,
+ *
+ * .. code-block:: bash
+ *
+ * echo "4 5 6" > pp_dpm_sclk
+ *
+ * will enable sclk levels 4, 5, and 6.
*
* NOTE: change to the dcefclk max dpm level is not supported now
*/
@@ -836,17 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk)
- return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
/*
@@ -895,18 +1046,25 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
int ret;
uint32_t mask = 0;
- if (amdgpu_sriov_vf(adev))
- return 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (ret)
return -EINVAL;
@@ -919,17 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk)
- return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
@@ -939,21 +1107,28 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- int ret;
uint32_t mask = 0;
+ int ret;
- if (amdgpu_sriov_vf(adev))
- return 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (ret)
return -EINVAL;
@@ -966,13 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
@@ -985,14 +1174,26 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1006,13 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
@@ -1025,14 +1240,26 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1046,13 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
@@ -1065,14 +1306,26 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1086,13 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
@@ -1105,14 +1372,26 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1127,12 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
else if (adev->powerplay.pp_funcs->get_sclk_od)
value = amdgpu_dpm_get_sclk_od(adev);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
@@ -1146,12 +1436,17 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
int ret;
long int value;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = kstrtol(buf, 0, &value);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@@ -1167,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
}
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -1178,12 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
else if (adev->powerplay.pp_funcs->get_mclk_od)
value = amdgpu_dpm_get_mclk_od(adev);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
@@ -1197,12 +1505,17 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
int ret;
long int value;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
ret = kstrtol(buf, 0, &value);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@@ -1218,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
}
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -1248,13 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_get_power_profile_mode(&adev->smu, buf);
+ size = smu_get_power_profile_mode(&adev->smu, buf);
else if (adev->powerplay.pp_funcs->get_power_profile_mode)
- return amdgpu_dpm_get_power_profile_mode(adev, buf);
+ size = amdgpu_dpm_get_power_profile_mode(adev, buf);
+ else
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
- return snprintf(buf, PAGE_SIZE, "\n");
+ return size;
}
@@ -1279,7 +1608,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
tmp[1] = '\0';
ret = kstrtol(tmp, 0, &profile_mode);
if (ret)
- goto fail;
+ return -EINVAL;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
if (count < 2 || count > 127)
@@ -1291,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
while (tmp_str[0]) {
sub_str = strsep(&tmp_str, delimiter);
ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
parameter_size++;
while (isspace(*tmp_str))
tmp_str++;
}
}
parameter[parameter_size] = profile_mode;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size);
+ ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
else if (adev->powerplay.pp_funcs->set_power_profile_mode)
ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (!ret)
return count;
-fail:
+
return -EINVAL;
}
@@ -1327,10 +1666,20 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int r, value, size = sizeof(value);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ r = pm_runtime_get_sync(ddev->dev);
+ if (r < 0)
+ return r;
+
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
(void *)&value, &size);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (r)
return r;
@@ -1353,10 +1702,20 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int r, value, size = sizeof(value);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ r = pm_runtime_get_sync(ddev->dev);
+ if (r < 0)
+ return r;
+
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
(void *)&value, &size);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (r)
return r;
@@ -1382,8 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint64_t count0, count1;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
count0, count1, pcie_get_mps(adev->pdev));
}
@@ -1405,6 +1776,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
if (adev->unique_id)
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
@@ -1458,9 +1832,9 @@ static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
-static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
- amdgpu_get_ppfeature_status,
- amdgpu_set_ppfeature_status);
+static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_feature_status,
+ amdgpu_set_pp_feature_status);
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
@@ -1468,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
int channel = to_sensor_dev_attr(attr)->index;
int r, temp = 0, size = sizeof(temp);
- /* Can't get temperature when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
-
if (channel >= PP_TEMP_MAX)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
switch (channel) {
case PP_TEMP_JUNCTION:
/* get current junction temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
break;
case PP_TEMP_EDGE:
/* get current edge temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
break;
case PP_TEMP_MEM:
/* get current memory temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
+ break;
+ default:
+ r = -EINVAL;
break;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (r)
+ return r;
+
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -1599,15 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 pwm_mode = 0;
+ int ret;
+
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
} else {
- if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return sprintf(buf, "%i\n", pwm_mode);
}
@@ -1617,31 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
size_t count)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- int err;
+ int err, ret;
int value;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = kstrtoint(buf, 10, &value);
+ if (err)
+ return err;
- if (is_support_sw_smu(adev)) {
- err = kstrtoint(buf, 10, &value);
- if (err)
- return err;
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
+ if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, value);
} else {
- if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
-
- err = kstrtoint(buf, 10, &value);
- if (err)
- return err;
+ }
amdgpu_dpm_set_fan_control_mode(adev, value);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return count;
}
@@ -1668,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
u32 value;
u32 pwm_mode;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
else
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
pr_info("manual fan speed control should be enabled first\n");
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
}
err = kstrtou32(buf, 10, &value);
- if (err)
+ if (err) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
value = (value * 100) / 255;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_set_fan_speed_percent(&adev->smu, value);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) {
+ else if (adev->powerplay.pp_funcs->set_fan_speed_percent)
err = amdgpu_dpm_set_fan_speed_percent(adev, value);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return count;
}
@@ -1708,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
int err;
u32 speed = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_percent(&adev->smu, &speed);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
speed = (speed * 255) / 100;
@@ -1736,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
int err;
u32 speed = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &speed);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return sprintf(buf, "%i\n", speed);
}
@@ -1763,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
u32 size = sizeof(min_rpm);
int r;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
(void *)&min_rpm, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1780,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
u32 size = sizeof(max_rpm);
int r;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
(void *)&max_rpm, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1796,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
int err;
u32 rpm = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return sprintf(buf, "%i\n", rpm);
}
@@ -1823,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
u32 value;
u32 pwm_mode;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
else
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
- if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+ if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -ENODATA;
-
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ }
err = kstrtou32(buf, 10, &value);
- if (err)
+ if (err) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_set_fan_speed_rpm(&adev->smu, value);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return count;
}
@@ -1859,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 pwm_mode = 0;
+ int ret;
+
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
} else {
- if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
}
@@ -1881,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
int value;
u32 pwm_mode;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
-
-
err = kstrtoint(buf, 10, &value);
if (err)
return err;
@@ -1898,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
else
return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, pwm_mode);
} else {
- if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return count;
}
@@ -1914,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 vddgfx;
int r, size = sizeof(vddgfx);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
(void *)&vddgfx, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1944,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 vddnb;
int r, size = sizeof(vddnb);
@@ -1952,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
if (!(adev->flags & AMD_IS_APU))
return -EINVAL;
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
(void *)&vddnb, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1978,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 query = 0;
int r, size = sizeof(u32);
unsigned uw;
- /* Can't get power when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
(void *)&query, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2013,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
uint32_t limit = 0;
+ ssize_t size;
+ int r;
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, true);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ smu_get_power_limit(&adev->smu, &limit, true, true);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
@@ -2031,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
uint32_t limit = 0;
+ ssize_t size;
+ int r;
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, false);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ smu_get_power_limit(&adev->smu, &limit, false, true);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ return size;
}
@@ -2053,20 +2524,32 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
int err;
u32 value;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
err = kstrtou32(buf, 10, &value);
if (err)
return err;
value = value / 1000000; /* convert to Watt */
- if (is_support_sw_smu(adev)) {
- adev->smu.funcs->set_power_limit(&adev->smu, value);
- } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+
+
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
+ if (is_support_sw_smu(adev))
+ err = smu_set_power_limit(&adev->smu, value);
+ else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
- if (err)
- return err;
- } else {
- return -EINVAL;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return count;
}
@@ -2076,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
uint32_t sclk;
int r, size = sizeof(sclk);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
(void *)&sclk, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2106,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
uint32_t mclk;
int r, size = sizeof(mclk);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
(void *)&mclk, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2199,9 +2686,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* - fan1_input: fan speed in RPM
*
- * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
+ * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM)
*
- * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
+ * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable
*
* hwmon interfaces for GPU clocks:
*
@@ -2297,6 +2784,23 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
+ /* under multi-vf mode, the hwmon attributes are all not supported */
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ /* there is no fan under pp one vf mode */
+ if (amdgpu_sriov_is_pp_one_vf(adev) &&
+ (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
+ return 0;
+
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
@@ -2352,7 +2856,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
effective_mode &= ~S_IWUSR;
}
- if ((adev->flags & AMD_IS_APU) &&
+ if (((adev->flags & AMD_IS_APU) ||
+ adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */
+ adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */
(attr == &sensor_dev_attr_power1_average.dev_attr.attr ||
attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
@@ -2376,6 +2882,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
return 0;
}
+ if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */
+ adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */
+ (attr == &sensor_dev_attr_in0_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_in0_label.dev_attr.attr))
+ return 0;
+
/* only APUs have vddnb */
if (!(adev->flags & AMD_IS_APU) &&
(attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
@@ -2656,17 +3168,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (is_support_sw_smu(adev)) {
- ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable);
- if (ret)
- DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n",
- enable ? "true" : "false", ret);
- } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
- /* enable/disable UVD */
- mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
- mutex_unlock(&adev->pm.mutex);
- }
+
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
+
/* enable/disable Low Memory PState for UVD (4k videos) */
if (adev->asic_type == CHIP_STONEY &&
adev->uvd.decode_image_width >= WIDTH_4K) {
@@ -2683,17 +3190,11 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (is_support_sw_smu(adev)) {
- ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable);
- if (ret)
- DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n",
- enable ? "true" : "false", ret);
- } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
- /* enable/disable VCE */
- mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
- mutex_unlock(&adev->pm.mutex);
- }
+
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
}
void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
@@ -2708,42 +3209,14 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
}
-int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev)
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
- return ret;
-
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_sclk\n");
- return ret;
- }
-
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_mclk\n");
- return ret;
- }
-
- ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
- if (ret) {
- DRM_ERROR("failed to create device file for dpm state\n");
- return ret;
- }
-
- return ret;
-}
-
-void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev)
-{
- if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
- return;
-
- device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
- device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
- device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
}
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
@@ -2820,6 +3293,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_dpm_sclk\n");
return ret;
}
+
+ /* Arcturus does not support standalone mclk/socclk/fclk level setting */
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_mclk.store = NULL;
+
+ dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_socclk.store = NULL;
+
+ dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_fclk.store = NULL;
+ }
+
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
@@ -2831,10 +3317,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_dpm_socclk\n");
return ret;
}
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
- return ret;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
+ return ret;
+ }
}
}
if (adev->asic_type >= CHIP_VEGA20) {
@@ -2844,10 +3332,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_pcie\n");
- return ret;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+ return ret;
+ }
}
ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
if (ret) {
@@ -2917,10 +3407,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev,
- &dev_attr_ppfeatures);
+ &dev_attr_pp_features);
if (ret) {
DRM_ERROR("failed to create device file "
- "ppfeatures\n");
+ "pp_features\n");
return ret;
}
}
@@ -2951,9 +3441,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (adev->asic_type >= CHIP_VEGA10) {
device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk);
- device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
+ if (adev->asic_type != CHIP_ARCTURUS)
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
}
- device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+ if (adev->asic_type != CHIP_ARCTURUS)
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
if (adev->asic_type >= CHIP_VEGA20)
device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
@@ -2974,7 +3466,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
- device_remove_file(adev->dev, &dev_attr_ppfeatures);
+ device_remove_file(adev->dev, &dev_attr_pp_features);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -2997,7 +3489,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm;
smu_handle_task(&adev->smu,
smu_dpm->dpm_level,
- AMD_PP_TASK_DISPLAY_CONFIG_CHANGE);
+ AMD_PP_TASK_DISPLAY_CONFIG_CHANGE,
+ true);
} else {
if (adev->powerplay.pp_funcs->dispatch_tasks) {
if (!amdgpu_device_has_dc_support(adev)) {
@@ -3133,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct drm_device *ddev = adev->ddev;
u32 flags = 0;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
amdgpu_device_ip_get_clockgating_state(adev, &flags);
seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
@@ -3143,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
if (!adev->pm.dpm_enabled) {
seq_printf(m, "dpm not enabled\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return 0;
}
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
- seq_printf(m, "PX asic powered off\n");
- } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
+
+ if (!is_support_sw_smu(adev) &&
+ adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
mutex_lock(&adev->pm.mutex);
if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
else
seq_printf(m, "Debugfs support not implemented for this asic\n");
mutex_unlock(&adev->pm.mutex);
+ r = 0;
} else {
- return amdgpu_debugfs_pm_info_pp(m, adev);
+ r = amdgpu_debugfs_pm_info_pp(m, adev);
}
- return 0;
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return r;
}
static const struct drm_info_list amdgpu_pm_info_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
index ef31448ee8d8..3da1da277805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
@@ -41,5 +41,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev);
void amdgpu_dpm_thermal_work_handler(struct work_struct *work);
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 0e6dba9f60f0..1311d6aec5d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event)
return -ENOENT;
/* update the hw_perf_event struct with config data */
- hwc->conf = event->attr.config;
+ hwc->config = event->attr.config;
return 0;
}
@@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
if (!(flags & PERF_EF_RELOAD))
- pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1);
- pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0);
break;
default:
break;
@@ -101,13 +101,13 @@ static void amdgpu_perf_read(struct perf_event *event)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf,
+ pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config,
&count);
break;
default:
count = 0;
break;
- };
+ }
} while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
local64_add(count - prev, &event->count);
@@ -126,11 +126,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0);
break;
default:
break;
- };
+ }
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
@@ -156,11 +156,12 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+ retval = pe->adev->df.funcs->pmc_start(pe->adev,
+ hwc->config, 1);
break;
default:
return 0;
- };
+ }
if (retval)
return retval;
@@ -184,11 +185,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1);
break;
default:
break;
- };
+ }
perf_event_update_userpage(event);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c027e5e7713e..3a1570dafe34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -32,6 +32,9 @@
#include "psp_v3_1.h"
#include "psp_v10_0.h"
#include "psp_v11_0.h"
+#include "psp_v12_0.h"
+
+#include "amdgpu_ras.h"
static void psp_set_funcs(struct amdgpu_device *adev);
@@ -53,13 +56,19 @@ static int psp_early_init(void *handle)
psp->autoload_supported = false;
break;
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = false;
break;
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
+ case CHIP_RENOIR:
+ psp_v12_0_set_psp_funcs(psp);
+ break;
default:
return -EINVAL;
}
@@ -81,6 +90,17 @@ static int psp_sw_init(void *handle)
return ret;
}
+ ret = psp_mem_training_init(psp);
+ if (ret) {
+ DRM_ERROR("Failed to initialize memory training!\n");
+ return ret;
+ }
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+
return 0;
}
@@ -88,6 +108,7 @@ static int psp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ psp_mem_training_fini(&adev->psp);
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
@@ -137,18 +158,26 @@ psp_cmd_submit_buf(struct psp_context *psp,
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
index = atomic_inc_return(&psp->fence_value);
- ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
- fence_mc_addr, index);
+ ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
if (ret) {
atomic_dec(&psp->fence_value);
mutex_unlock(&psp->mutex);
return ret;
}
+ amdgpu_asic_invalidate_hdp(psp->adev, NULL);
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
+ /*
+ * Shouldn't wait for timeout when err_event_athub occurs,
+ * because gpu reset thread triggered and lock resource should
+ * be released for psp resume sequence.
+ */
+ if (amdgpu_ras_intr_triggered())
+ break;
msleep(1);
+ amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
/* In some cases, psp response status is not 0 even there is no
@@ -162,8 +191,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
if (ucode)
DRM_WARN("failed to load ucode id (%d) ",
ucode->ucode_id);
- DRM_WARN("psp command failed and response status is (%d)\n",
- psp->cmd_buf_mem->resp.status);
+ DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n",
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
if (!timeout) {
mutex_unlock(&psp->mutex);
return -EINVAL;
@@ -233,6 +263,8 @@ static int psp_tmr_init(struct psp_context *psp)
{
int ret;
int tmr_size;
+ void *tmr_buf;
+ void **pptr;
/*
* According to HW engineer, they prefer the TMR address be "naturally
@@ -245,7 +277,8 @@ static int psp_tmr_init(struct psp_context *psp)
/* For ASICs support RLC autoload, psp will parse the toc
* and calculate the total size of TMR needed */
- if (psp->toc_start_addr &&
+ if (!amdgpu_sriov_vf(psp->adev) &&
+ psp->toc_start_addr &&
psp->toc_bin_size &&
psp->fw_pri_buf) {
ret = psp_load_toc(psp, &tmr_size);
@@ -255,9 +288,10 @@ static int psp_tmr_init(struct psp_context *psp)
}
}
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
- &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+ &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
return ret;
}
@@ -278,47 +312,23 @@ static int psp_tmr_load(struct psp_context *psp)
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- if (ret)
- goto failed;
kfree(cmd);
- return 0;
-
-failed:
- kfree(cmd);
return ret;
}
-static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t asd_mc, uint64_t asd_mc_shared,
- uint32_t size, uint32_t shared_size)
+static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t asd_mc, uint32_t size)
{
cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_len = size;
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
-static int psp_asd_init(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for shared ASD <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->asd_shared_bo,
- &psp->asd_shared_mc_addr,
- &psp->asd_shared_buf);
-
- return ret;
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0;
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0;
+ cmd->cmd.cmd_load_ta.cmd_buf_len = 0;
}
static int psp_asd_load(struct psp_context *psp)
@@ -340,11 +350,49 @@ static int psp_asd_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
- psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr,
- psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
+ psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
+ psp->asd_ucode_size);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
+ if (!ret) {
+ psp->asd_context.asd_initialized = true;
+ psp->asd_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+
+static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
+ cmd->cmd.cmd_unload_ta.session_id = session_id;
+}
+
+static int psp_asd_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->asd_context.asd_initialized)
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (!ret)
+ psp->asd_context.asd_initialized = false;
kfree(cmd);
@@ -379,18 +427,20 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
return ret;
}
-static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
- uint32_t xgmi_ta_size, uint32_t shared_size)
+static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t ta_bin_mc,
+ uint32_t ta_bin_size,
+ uint64_t ta_shared_mc,
+ uint32_t ta_shared_size)
{
- cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
- cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
- cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
-
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
+ cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_len = ta_bin_size;
+
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc);
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc);
+ cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size;
}
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
@@ -410,6 +460,36 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp)
return ret;
}
+static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t ta_cmd_id,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
+ cmd->cmd.cmd_invoke_cmd.session_id = session_id;
+ cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
+}
+
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ uint32_t session_id)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+
+ return ret;
+}
+
static int psp_xgmi_load(struct psp_context *psp)
{
int ret;
@@ -418,8 +498,6 @@ static int psp_xgmi_load(struct psp_context *psp)
/*
* TODO: bypass the loading in sriov for now
*/
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
@@ -428,9 +506,11 @@ static int psp_xgmi_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
- psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->xgmi_context.xgmi_shared_mc_addr,
- psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_xgmi_ucode_size,
+ psp->xgmi_context.xgmi_shared_mc_addr,
+ PSP_XGMI_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -445,29 +525,25 @@ static int psp_xgmi_load(struct psp_context *psp)
return ret;
}
-static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t xgmi_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
- cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
-}
-
static int psp_xgmi_unload(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* XGMI TA unload currently is not supported on Arcturus */
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return 0;
/*
* TODO: bypass the unloading in sriov for now
*/
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
+ psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -477,40 +553,9 @@ static int psp_xgmi_unload(struct psp_context *psp)
return ret;
}
-static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ta_cmd_id,
- uint32_t xgmi_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
- cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
- cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
- /* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the loading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
- psp->xgmi_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
-
- kfree(cmd);
-
- return ret;
+ return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id);
}
static int psp_xgmi_terminate(struct psp_context *psp)
@@ -539,7 +584,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- if (!psp->adev->psp.ta_fw)
+ if (!psp->adev->psp.ta_fw ||
+ !psp->adev->psp.ta_xgmi_ucode_size ||
+ !psp->adev->psp.ta_xgmi_start_addr)
return -ENOENT;
if (!psp->xgmi_context.initialized) {
@@ -564,20 +611,6 @@ static int psp_xgmi_initialize(struct psp_context *psp)
}
// ras begin
-static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t ras_ta_mc, uint64_t ras_mc_shared,
- uint32_t ras_ta_size, uint32_t shared_size)
-{
- cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc);
- cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc);
- cmd->cmd.cmd_load_ta.app_len = ras_ta_size;
-
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
static int psp_ras_init_shared_buf(struct psp_context *psp)
{
int ret;
@@ -613,15 +646,17 @@ static int psp_ras_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
- psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->ras.ras_shared_mc_addr,
- psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE);
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_ras_ucode_size,
+ psp->ras.ras_shared_mc_addr,
+ PSP_RAS_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
if (!ret) {
- psp->ras.ras_initialized = 1;
+ psp->ras.ras_initialized = true;
psp->ras.session_id = cmd->resp.session_id;
}
@@ -630,13 +665,6 @@ static int psp_ras_load(struct psp_context *psp)
return ret;
}
-static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ras_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
- cmd->cmd.cmd_unload_ta.session_id = ras_session_id;
-}
-
static int psp_ras_unload(struct psp_context *psp)
{
int ret;
@@ -652,7 +680,7 @@ static int psp_ras_unload(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
- psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id);
+ psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -662,40 +690,15 @@ static int psp_ras_unload(struct psp_context *psp)
return ret;
}
-static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ta_cmd_id,
- uint32_t ras_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
- cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id;
- cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
- /* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id,
- psp->ras.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
-
- kfree(cmd);
-
- return ret;
+ return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
}
int psp_ras_enable_features(struct psp_context *psp,
@@ -728,6 +731,12 @@ static int psp_ras_terminate(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
if (!psp->ras.ras_initialized)
return 0;
@@ -735,7 +744,7 @@ static int psp_ras_terminate(struct psp_context *psp)
if (ret)
return ret;
- psp->ras.ras_initialized = 0;
+ psp->ras.ras_initialized = false;
/* free ras shared memory */
amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo,
@@ -749,6 +758,18 @@ static int psp_ras_initialize(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_ras_ucode_size ||
+ !psp->adev->psp.ta_ras_start_addr) {
+ dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n");
+ return 0;
+ }
+
if (!psp->ras.ras_initialized) {
ret = psp_ras_init_shared_buf(psp);
if (ret)
@@ -763,6 +784,274 @@ static int psp_ras_initialize(struct psp_context *psp)
}
// ras end
+// HDCP start
+static int psp_hdcp_init_shared_buf(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for hdcp ta <-> Driver
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->hdcp_context.hdcp_shared_bo,
+ &psp->hdcp_context.hdcp_shared_mc_addr,
+ &psp->hdcp_context.hdcp_shared_buf);
+
+ return ret;
+}
+
+static int psp_hdcp_load(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr,
+ psp->ta_hdcp_ucode_size);
+
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_hdcp_ucode_size,
+ psp->hdcp_context.hdcp_shared_mc_addr,
+ PSP_HDCP_SHARED_MEM_SIZE);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ if (!ret) {
+ psp->hdcp_context.hdcp_initialized = true;
+ psp->hdcp_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+static int psp_hdcp_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_hdcp_ucode_size ||
+ !psp->adev->psp.ta_hdcp_start_addr) {
+ dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n");
+ return 0;
+ }
+
+ if (!psp->hdcp_context.hdcp_initialized) {
+ ret = psp_hdcp_init_shared_buf(psp);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_hdcp_load(psp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_hdcp_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the unloading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+
+ return ret;
+}
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id);
+}
+
+static int psp_hdcp_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->hdcp_context.hdcp_initialized)
+ return 0;
+
+ ret = psp_hdcp_unload(psp);
+ if (ret)
+ return ret;
+
+ psp->hdcp_context.hdcp_initialized = false;
+
+ /* free hdcp shared memory */
+ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo,
+ &psp->hdcp_context.hdcp_shared_mc_addr,
+ &psp->hdcp_context.hdcp_shared_buf);
+
+ return 0;
+}
+// HDCP end
+
+// DTM start
+static int psp_dtm_init_shared_buf(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for dtm ta <-> Driver
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->dtm_context.dtm_shared_bo,
+ &psp->dtm_context.dtm_shared_mc_addr,
+ &psp->dtm_context.dtm_shared_buf);
+
+ return ret;
+}
+
+static int psp_dtm_load(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size);
+
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_dtm_ucode_size,
+ psp->dtm_context.dtm_shared_mc_addr,
+ PSP_DTM_SHARED_MEM_SIZE);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ if (!ret) {
+ psp->dtm_context.dtm_initialized = true;
+ psp->dtm_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+
+static int psp_dtm_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_dtm_ucode_size ||
+ !psp->adev->psp.ta_dtm_start_addr) {
+ dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n");
+ return 0;
+ }
+
+ if (!psp->dtm_context.dtm_initialized) {
+ ret = psp_dtm_init_shared_buf(psp);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_dtm_load(psp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id);
+}
+
+static int psp_dtm_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->dtm_context.dtm_initialized)
+ return 0;
+
+ ret = psp_hdcp_unload(psp);
+ if (ret)
+ return ret;
+
+ psp->dtm_context.dtm_initialized = false;
+
+ /* free hdcp shared memory */
+ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo,
+ &psp->dtm_context.dtm_shared_mc_addr,
+ &psp->dtm_context.dtm_shared_buf);
+
+ return 0;
+}
+// DTM end
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -809,36 +1098,6 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
- ret = psp_asd_init(psp);
- if (ret) {
- DRM_ERROR("PSP asd init failed!\n");
- return ret;
- }
-
- ret = psp_asd_load(psp);
- if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
- return ret;
- }
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- ret = psp_xgmi_initialize(psp);
- /* Warning the XGMI seesion initialize failure
- * Instead of stop driver initialization
- */
- if (ret)
- dev_err(psp->adev->dev,
- "XGMI: Failed to initialize XGMI session\n");
- }
-
-
- if (psp->adev->psp.ta_fw) {
- ret = psp_ras_initialize(psp);
- if (ret)
- dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
- }
-
return 0;
}
@@ -852,6 +1111,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_SDMA1:
*type = GFX_FW_TYPE_SDMA1;
break;
+ case AMDGPU_UCODE_ID_SDMA2:
+ *type = GFX_FW_TYPE_SDMA2;
+ break;
+ case AMDGPU_UCODE_ID_SDMA3:
+ *type = GFX_FW_TYPE_SDMA3;
+ break;
+ case AMDGPU_UCODE_ID_SDMA4:
+ *type = GFX_FW_TYPE_SDMA4;
+ break;
+ case AMDGPU_UCODE_ID_SDMA5:
+ *type = GFX_FW_TYPE_SDMA5;
+ break;
+ case AMDGPU_UCODE_ID_SDMA6:
+ *type = GFX_FW_TYPE_SDMA6;
+ break;
+ case AMDGPU_UCODE_ID_SDMA7:
+ *type = GFX_FW_TYPE_SDMA7;
+ break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
@@ -900,6 +1177,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_VCN:
*type = GFX_FW_TYPE_VCN;
break;
+ case AMDGPU_UCODE_ID_VCN1:
+ *type = GFX_FW_TYPE_VCN1;
+ break;
case AMDGPU_UCODE_ID_DMCU_ERAM:
*type = GFX_FW_TYPE_DMCU_ERAM;
break;
@@ -912,6 +1192,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_VCN1_RAM:
*type = GFX_FW_TYPE_VCN1_RAM;
break;
+ case AMDGPU_UCODE_ID_DMCUB:
+ *type = GFX_FW_TYPE_DMUB;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -920,6 +1203,54 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
return 0;
}
+static void psp_print_fw_hdr(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+{
+ struct amdgpu_device *adev = psp->adev;
+ struct common_firmware_header *hdr;
+
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ case AMDGPU_UCODE_ID_SDMA1:
+ case AMDGPU_UCODE_ID_SDMA2:
+ case AMDGPU_UCODE_ID_SDMA3:
+ case AMDGPU_UCODE_ID_SDMA4:
+ case AMDGPU_UCODE_ID_SDMA5:
+ case AMDGPU_UCODE_ID_SDMA6:
+ case AMDGPU_UCODE_ID_SDMA7:
+ hdr = (struct common_firmware_header *)
+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_PFP:
+ hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_RLC_G:
+ hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_SMC:
+ hdr = (struct common_firmware_header *)adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(hdr);
+ break;
+ default:
+ break;
+ }
+}
+
static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd)
{
@@ -980,24 +1311,39 @@ out:
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
(psp_smu_reload_quirk(psp) || psp->autoload_supported))
continue;
+
if (amdgpu_sriov_vf(adev) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
/*skip ucode loading in SRIOV VF */
continue;
+
if (psp->autoload_supported &&
(ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
/* skip mec JT when autoload is enabled */
continue;
+ psp_print_fw_hdr(psp, ucode);
+
ret = psp_execute_np_fw_load(psp, ucode);
if (ret)
return ret;
/* Start rlc autoload after psp recieved all the gfx firmware */
- if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+ AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
@@ -1028,16 +1374,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
if (!psp->cmd)
return -ENOMEM;
- /* this fw pri bo is not used under SRIOV */
- if (!amdgpu_sriov_vf(psp->adev)) {
- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_GTT,
- &psp->fw_pri_bo,
- &psp->fw_pri_mc_addr,
- &psp->fw_pri_buf);
- if (ret)
- goto failed;
- }
+ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr,
+ &psp->fw_pri_buf);
+ if (ret)
+ goto failed;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
@@ -1071,6 +1414,39 @@ skip_memalloc:
if (ret)
goto failed;
+ ret = psp_asd_load(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ return ret;
+ }
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+
+ if (psp->adev->psp.ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+ }
+
return 0;
failed:
@@ -1115,23 +1491,29 @@ static int psp_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
+ void *tmr_buf;
+ void **pptr;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.initialized == 1)
psp_xgmi_terminate(psp);
- if (psp->adev->psp.ta_fw)
+ if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
+ psp_dtm_terminate(psp);
+ psp_hdcp_terminate(psp);
+ }
+
+ psp_asd_unload(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
- amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
amdgpu_bo_free_kernel(&psp->fw_pri_bo,
&psp->fw_pri_mc_addr, &psp->fw_pri_buf);
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf);
- amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr,
- &psp->asd_shared_buf);
amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
(void **)&psp->cmd_buf_mem);
@@ -1162,6 +1544,16 @@ static int psp_suspend(void *handle)
DRM_ERROR("Failed to terminate ras ta\n");
return ret;
}
+ ret = psp_hdcp_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate hdcp ta\n");
+ return ret;
+ }
+ ret = psp_dtm_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate dtm ta\n");
+ return ret;
+ }
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
@@ -1181,6 +1573,12 @@ static int psp_resume(void *handle)
DRM_INFO("PSP is resuming...\n");
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+
mutex_lock(&adev->firmware.mutex);
ret = psp_hw_start(psp);
@@ -1191,6 +1589,39 @@ static int psp_resume(void *handle)
if (ret)
goto failed;
+ ret = psp_asd_load(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ goto failed;
+ }
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+
+ if (psp->adev->psp.ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+ }
+
mutex_unlock(&adev->firmware.mutex);
return 0;
@@ -1220,9 +1651,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -1248,6 +1676,56 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
return psp_execute_np_fw_load(&adev->psp, &ucode);
}
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index)
+{
+ unsigned int psp_write_ptr_reg = 0;
+ struct psp_gfx_rb_frame *write_frame;
+ struct psp_ring *ring = &psp->km_ring;
+ struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+ struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+ ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t ring_size_dw = ring->ring_size / 4;
+ uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+ /* KM (GPCOM) prepare write pointer */
+ psp_write_ptr_reg = psp_ring_get_wptr(psp);
+
+ /* Update KM RB frame pointer to new frame */
+ /* write_frame ptr increments by size of rb_frame in bytes */
+ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+ if ((psp_write_ptr_reg % ring_size_dw) == 0)
+ write_frame = ring_buffer_start;
+ else
+ write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+ /* Check invalid write_frame ptr address */
+ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+ DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ return -EINVAL;
+ }
+
+ /* Initialize KM RB frame */
+ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+ /* Update KM RB frame */
+ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+ write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+ write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+ write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+ write_frame->fence_value = index;
+ amdgpu_asic_flush_hdp(adev, NULL);
+
+ /* Update the write Pointer in DWORDs */
+ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+ psp_ring_set_wptr(psp, psp_write_ptr_reg);
+ return 0;
+}
+
static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
enum AMDGPU_UCODE_ID ucode_type)
{
@@ -1329,3 +1807,12 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block =
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v12_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index e0fc2a790e53..611021514c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -32,11 +32,13 @@
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
-#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
#define PSP_RAS_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
+#define PSP_HDCP_SHARED_MEM_SIZE 0x4000
+#define PSP_DTM_SHARED_MEM_SIZE 0x4000
+#define PSP_SHARED_MEM_SIZE 0x4000
struct psp_context;
struct psp_xgmi_node_info;
@@ -46,6 +48,8 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SYSDRV = 0x10000,
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
+ PSP_BL__DRAM_LONG_TRAIN = 0x100000,
+ PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
};
enum psp_ring_type
@@ -89,10 +93,6 @@ struct psp_funcs
enum psp_ring_type ring_type);
int (*ring_destroy)(struct psp_context *psp,
enum psp_ring_type ring_type);
- int (*cmd_submit)(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index);
bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type);
@@ -109,6 +109,11 @@ struct psp_funcs
struct ta_ras_trigger_error_input *info);
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
int (*rlc_autoload_start)(struct psp_context *psp);
+ int (*mem_training_init)(struct psp_context *psp);
+ void (*mem_training_fini)(struct psp_context *psp);
+ int (*mem_training)(struct psp_context *psp, uint32_t ops);
+ uint32_t (*ring_get_wptr)(struct psp_context *psp);
+ void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
@@ -124,6 +129,11 @@ struct psp_xgmi_topology_info {
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
+struct psp_asd_context {
+ bool asd_initialized;
+ uint32_t session_id;
+};
+
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
@@ -143,6 +153,66 @@ struct psp_ras_context {
struct amdgpu_ras *ras;
};
+struct psp_hdcp_context {
+ bool hdcp_initialized;
+ uint32_t session_id;
+ struct amdgpu_bo *hdcp_shared_bo;
+ uint64_t hdcp_shared_mc_addr;
+ void *hdcp_shared_buf;
+};
+
+struct psp_dtm_context {
+ bool dtm_initialized;
+ uint32_t session_id;
+ struct amdgpu_bo *dtm_shared_bo;
+ uint64_t dtm_shared_mc_addr;
+ void *dtm_shared_buf;
+};
+
+#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
+#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
+#define GDDR6_MEM_TRAINING_OFFSET 0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+
+enum psp_memory_training_init_flag {
+ PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
+ PSP_MEM_TRAIN_SUPPORT = 0x1,
+ PSP_MEM_TRAIN_INIT_FAILED = 0x2,
+ PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4,
+ PSP_MEM_TRAIN_INIT_SUCCESS = 0x8,
+};
+
+enum psp_memory_training_ops {
+ PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1,
+ PSP_MEM_TRAIN_SAVE = 0x2,
+ PSP_MEM_TRAIN_RESTORE = 0x4,
+ PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8,
+ PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG,
+ PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG,
+};
+
+struct psp_memory_training_context {
+ /*training data size*/
+ u64 train_data_size;
+ /*
+ * sys_cache
+ * cpu virtual address
+ * system memory buffer that used to store the training data.
+ */
+ void *sys_cache;
+
+ /*vram offset of the p2c training data*/
+ u64 p2c_train_data_offset;
+
+ /*vram offset of the c2p training data*/
+ u64 c2p_train_data_offset;
+ struct amdgpu_bo *c2p_bo;
+
+ enum psp_memory_training_init_flag init;
+ u32 training_cnt;
+};
+
struct psp_context
{
struct amdgpu_device *adev;
@@ -172,17 +242,13 @@ struct psp_context
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
- void *tmr_buf;
- /* asd firmware and buffer */
+ /* asd firmware */
const struct firmware *asd_fw;
uint32_t asd_fw_version;
uint32_t asd_feature_version;
uint32_t asd_ucode_size;
uint8_t *asd_start_addr;
- struct amdgpu_bo *asd_shared_bo;
- uint64_t asd_shared_mc_addr;
- void *asd_shared_buf;
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
@@ -208,9 +274,22 @@ struct psp_context
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_ucode_size;
uint8_t *ta_ras_start_addr;
+
+ uint32_t ta_hdcp_ucode_version;
+ uint32_t ta_hdcp_ucode_size;
+ uint8_t *ta_hdcp_start_addr;
+
+ uint32_t ta_dtm_ucode_version;
+ uint32_t ta_dtm_ucode_size;
+ uint8_t *ta_dtm_start_addr;
+
+ struct psp_asd_context asd_context;
struct psp_xgmi_context xgmi_context;
struct psp_ras_context ras;
+ struct psp_hdcp_context hdcp_context;
+ struct psp_dtm_context dtm_context;
struct mutex mutex;
+ struct psp_memory_training_context mem_train_ctx;
};
struct amdgpu_psp_funcs {
@@ -223,8 +302,6 @@ struct amdgpu_psp_funcs {
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
-#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
- (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
#define psp_compare_sram_data(psp, ucode, type) \
(psp)->funcs->compare_sram_data((psp), (ucode), (type))
#define psp_init_microcode(psp) \
@@ -253,6 +330,12 @@ struct amdgpu_psp_funcs {
(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define psp_rlc_autoload(psp) \
((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0)
+#define psp_mem_training_init(psp) \
+ ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
+#define psp_mem_training_fini(psp) \
+ ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0)
+#define psp_mem_training(psp, ops) \
+ ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
@@ -263,6 +346,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->ras_cure_posion ? \
(psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL)
+#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
+#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -270,6 +356,7 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, bool check_changed);
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
@@ -280,10 +367,16 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_rlc_autoload_start(struct psp_context *psp);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value);
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fac7aa2c244f..cef94e2169fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -25,78 +25,13 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
-
-struct ras_ih_data {
- /* interrupt bottom half */
- struct work_struct ih_work;
- int inuse;
- /* IP callback */
- ras_ih_cb cb;
- /* full of entries */
- unsigned char *ring;
- unsigned int ring_size;
- unsigned int element_size;
- unsigned int aligned_element_size;
- unsigned int rptr;
- unsigned int wptr;
-};
-
-struct ras_fs_data {
- char sysfs_name[32];
- char debugfs_name[32];
-};
-
-struct ras_err_data {
- unsigned long ue_count;
- unsigned long ce_count;
-};
-
-struct ras_err_handler_data {
- /* point to bad pages array */
- struct {
- unsigned long bp;
- struct amdgpu_bo *bo;
- } *bps;
- /* the count of entries */
- int count;
- /* the space can place new entries */
- int space_left;
- /* last reserved entry's index + 1 */
- int last_reserved;
-};
-
-struct ras_manager {
- struct ras_common_if head;
- /* reference count */
- int use;
- /* ras block link */
- struct list_head node;
- /* the device */
- struct amdgpu_device *adev;
- /* debugfs */
- struct dentry *ent;
- /* sysfs */
- struct device_attribute sysfs_attr;
- int attr_inuse;
-
- /* fs node name */
- struct ras_fs_data fs_data;
-
- /* IH data */
- struct ras_ih_data ih_data;
-
- struct ras_err_data err_data;
-};
-
-struct ras_badpage {
- unsigned int bp;
- unsigned int size;
- unsigned int flags;
-};
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
const char *ras_error_string[] = {
"none",
@@ -130,11 +65,19 @@ const char *ras_block_string[] = {
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr);
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
- struct amdgpu_bo **bo_ptr);
+/* inject address is 52 bits */
+#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
+
+enum amdgpu_ras_retire_page_reservation {
+ AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+ AMDGPU_RAS_RETIRE_PAGE_PENDING,
+ AMDGPU_RAS_RETIRE_PAGE_FAULT,
+};
+
+atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr);
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
@@ -196,6 +139,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
char err[9] = "ue";
int op = -1;
int block_id;
+ uint32_t sub_block;
u64 address, value;
if (*pos)
@@ -223,17 +167,23 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return -EINVAL;
data->head.block = block_id;
- data->head.type = memcmp("ue", err, 2) == 0 ?
- AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE :
- AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ /* only ue and ce errors are supported */
+ if (!memcmp("ue", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ else if (!memcmp("ce", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else
+ return -EINVAL;
+
data->op = op;
if (op == 2) {
- if (sscanf(str, "%*s %*s %*s %llu %llu",
- &address, &value) != 2)
- if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx",
- &address, &value) != 2)
+ if (sscanf(str, "%*s %*s %*s %u %llu %llu",
+ &sub_block, &address, &value) != 3)
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ &sub_block, &address, &value) != 3)
return -EINVAL;
+ data->head.sub_block_index = sub_block;
data->inject.address = address;
data->inject.value = value;
}
@@ -247,6 +197,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
+
/**
* DOC: AMDGPU RAS debugfs control interface
*
@@ -266,32 +217,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* As their names indicate, inject operation will write the
* value to the address.
*
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
* It has three kinds of operations.
- * 0: disable RAS on the block. Take ::head as its data.
- * 1: enable RAS on the block. Take ::head as its data.
- * 2: inject errors on the block. Take ::inject as its data.
+ *
+ * - 0: disable RAS on the block. Take ::head as its data.
+ * - 1: enable RAS on the block. Take ::head as its data.
+ * - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
*
- * bash:
- * echo op block [error [address value]] > .../ras/ras_ctrl
- * op: disable, enable, inject
- * disable: only block is needed
- * enable: block and error are needed
- * inject: error, address, value are needed
- * block: umc, smda, gfx, .........
- * see ras_block_string[] for details
- * error: ue, ce
- * ue: multi_uncorrectable
- * ce: single_correctable
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
+ *
+ * .. code-block:: bash
+ *
+ * echo op block [error [sub_block address value]] > .../ras/ras_ctrl
*
- * here are some examples for bash commands,
- * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
- * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * Parameters:
+ *
+ * op: disable, enable, inject
+ * disable: only block is needed
+ * enable: block and error are needed
+ * inject: error, address, value are needed
+ * block: umc, sdma, gfx, .........
+ * see ras_block_string[] for details
+ * error: ue, ce
+ * ue: multi_uncorrectable
+ * ce: single_correctable
+ * sub_block:
+ * sub block index, pass 0 if there is no sub block
+ *
+ * here are some examples for bash commands:
+ *
+ * .. code-block:: bash
+ *
+ * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
*
* How to check the result?
@@ -302,15 +267,17 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* For inject, please check corresponding err count at
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
- * NOTE: operation is only allowed on blocks which are supported.
- * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * .. note::
+ * Operations are only allowed on blocks which are supported.
+ * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct ras_debug_if data;
- struct amdgpu_bo *bo;
int ret = 0;
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
@@ -328,22 +295,27 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- ret = amdgpu_ras_reserve_vram(adev,
- data.inject.address, PAGE_SIZE, &bo);
- if (ret) {
- /* address was offset, now it is absolute.*/
- data.inject.address += adev->gmc.vram_start;
- if (data.inject.address > adev->gmc.vram_end)
- break;
- } else
- data.inject.address = amdgpu_bo_gpu_offset(bo);
+ if ((data.inject.address >= adev->gmc.mc_vram_size) ||
+ (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ amdgpu_ras_check_bad_page(adev, data.inject.address)) {
+ DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
+ data.inject.address);
+ break;
+ }
+
+ /* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
- amdgpu_ras_release_vram(adev, &bo);
break;
default:
ret = -EINVAL;
break;
- };
+ }
if (ret)
return -EINVAL;
@@ -351,6 +323,33 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
return size;
}
+/**
+ * DOC: AMDGPU RAS debugfs EEPROM table reset interface
+ *
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages which experiences ECC errors in vram. This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ int ret;
+
+ ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
+
+ return ret == 1 ? size : -EIO;
+}
+
static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
.owner = THIS_MODULE,
.read = NULL,
@@ -358,6 +357,34 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_ras_debugfs_eeprom_write,
+ .llseek = default_llseek
+};
+
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows the user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ * ue: 0
+ * ce: 1
+ *
+ */
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -415,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
}
/* return an obj equal to head, or the first when head is NULL */
-static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_common_if *head)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -536,15 +563,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0;
- ret = psp_ras_enable_features(&adev->psp, &info, enable);
- if (ret) {
- DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
- enable ? "enable":"disable",
- ras_block_str(head->block),
- ret);
- if (ret == TA_RAS_STATUS__RESET_NEEDED)
- return -EAGAIN;
- return -EINVAL;
+ if (!amdgpu_ras_intr_triggered()) {
+ ret = psp_ras_enable_features(&adev->psp, &info, enable);
+ if (ret) {
+ DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
+ enable ? "enable":"disable",
+ ras_block_str(head->block),
+ ret);
+ if (ret == TA_RAS_STATUS__RESET_NEEDED)
+ return -EAGAIN;
+ return -EINVAL;
+ }
}
/* setup the obj */
@@ -656,17 +685,77 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ int i;
if (!obj)
return -EINVAL;
- /* TODO might read the register to read the count */
+
+ switch (info->head.block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ if (adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, &err_data);
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ if (adev->umc.funcs->query_ras_error_address)
+ adev->umc.funcs->query_ras_error_address(adev, &err_data);
+ break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ if (adev->sdma.funcs->query_ras_error_count) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->sdma.funcs->query_ras_error_count(adev, i,
+ &err_data);
+ }
+ break;
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, &err_data);
+ break;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ if (adev->mmhub.funcs->query_ras_error_count)
+ adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
+ break;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ if (adev->nbio.funcs->query_ras_error_count)
+ adev->nbio.funcs->query_ras_error_count(adev, &err_data);
+ break;
+ default:
+ break;
+ }
+
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
+ if (err_data.ce_count) {
+ dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
+ obj->err_data.ce_count, ras_block_str(info->head.block));
+ }
+ if (err_data.ue_count) {
+ dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
+ obj->err_data.ue_count, ras_block_str(info->head.block));
+ }
+
return 0;
}
+uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t df_inst_id;
+
+ if ((!adev->df.funcs) ||
+ (!adev->df.funcs->get_df_inst_id) ||
+ (!adev->df.funcs->get_dram_base_addr))
+ return addr;
+
+ df_inst_id = adev->df.funcs->get_df_inst_id(adev);
+
+ return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -684,13 +773,31 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
- if (block_info.block_id != TA_RAS_BLOCK__UMC) {
+ /* Calculate XGMI relative offset */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ block_info.address = get_xgmi_relative_phy_addr(adev,
+ block_info.address);
+ }
+
+ switch (info->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->ras_error_inject)
+ ret = adev->gfx.funcs->ras_error_inject(adev, info);
+ else
+ ret = -EINVAL;
+ break;
+ case AMDGPU_RAS_BLOCK__UMC:
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ break;
+ default:
DRM_INFO("%s error injection is not supported yet\n",
ras_block_str(info->head.block));
- return -EINVAL;
+ ret = -EINVAL;
}
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
if (ret)
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
ras_block_str(info->head.block),
@@ -707,7 +814,7 @@ int amdgpu_ras_error_cure(struct amdgpu_device *adev,
}
/* get the total error counts on all IPs */
-int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -715,7 +822,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
struct ras_err_data data = {0, 0};
if (!con)
- return -EINVAL;
+ return 0;
list_for_each_entry(obj, &con->head, node) {
struct ras_query_if info = {
@@ -723,7 +830,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
};
if (amdgpu_ras_error_query(adev, &info))
- return -EINVAL;
+ return 0;
data.ce_count += info.ce_count;
data.ue_count += info.ue_count;
@@ -742,18 +849,18 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
switch (flags) {
- case 0:
+ case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
return "R";
- case 1:
+ case AMDGPU_RAS_RETIRE_PAGE_PENDING:
return "P";
- case 2:
+ case AMDGPU_RAS_RETIRE_PAGE_FAULT:
default:
return "F";
};
}
-/*
- * DOC: ras sysfs gpu_vram_bad_pages interface
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
*
* It allows user to read the bad pages of vram on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
@@ -765,14 +872,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*
* gpu pfn and gpu page size are printed in hex format.
* flags can be one of below character,
+ *
* R: reserved, this gpu page is reserved and not able to use.
+ *
* P: pending for reserve, this gpu page is marked as bad, will be reserved
- * in next window of page_reserve.
+ * in next window of page_reserve.
+ *
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
*
- * examples:
- * 0x00000001 : 0x00001000 : R
- * 0x00000002 : 0x00001000 : P
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ *
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
@@ -812,32 +926,8 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
{
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
- struct drm_device *ddev = dev_get_drvdata(dev);
- struct amdgpu_device *adev = ddev->dev_private;
- struct ras_common_if head;
- int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
- int i;
- ssize_t s;
- struct ras_manager *obj;
-
- s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
- for (i = 0; i < ras_block_count; i++) {
- head.block = i;
-
- if (amdgpu_ras_is_feature_enabled(adev, &head)) {
- obj = amdgpu_ras_find_obj(adev, &head);
- s += scnprintf(&buf[s], PAGE_SIZE - s,
- "%s: %s\n",
- ras_block_str(i),
- ras_err_str(obj->head.type));
- } else
- s += scnprintf(&buf[s], PAGE_SIZE - s,
- "%s: disabled\n",
- ras_block_str(i));
- }
-
- return s;
+ return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
}
static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
@@ -970,6 +1060,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
@@ -977,8 +1085,21 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
struct drm_minor *minor = adev->ddev->primary;
con->dir = debugfs_create_dir("ras", minor->debugfs_root);
- con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
- adev, &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_eeprom_ops);
+
+ /*
+ * After one uncorrectable error happens, usually GPU recovery will
+ * be scheduled. But due to the known problem in GPU recovery failing
+ * to bring GPU back, below interface provides one direct way to
+ * user to reboot system automatically in such case within
+ * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
+ * will never be called.
+ */
+ debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
+ &con->reboot);
}
void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
@@ -1023,10 +1144,8 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
amdgpu_ras_debugfs_remove(adev, &obj->head);
}
- debugfs_remove(con->ent);
- debugfs_remove(con->dir);
+ debugfs_remove_recursive(con->dir);
con->dir = NULL;
- con->ent = NULL;
}
/* debugfs end */
@@ -1054,6 +1173,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
int ret;
+ struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1068,19 +1188,19 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
* from the callback to udpate the error type/count, etc
*/
if (data->cb) {
- ret = data->cb(obj->adev, &entry);
+ ret = data->cb(obj->adev, &err_data, &entry);
/* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system.
* But leave IP do that recovery, here we just dispatch
* the error.
*/
- if (ret == AMDGPU_RAS_UE) {
- obj->err_data.ue_count++;
+ if (ret == AMDGPU_RAS_SUCCESS) {
+ /* these counts could be left as 0 if
+ * some blocks do not count error number
+ */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
}
- /* Might need get ce count by register, but not all IP
- * saves ce count, some IP just use one bit or two bits
- * to indicate ce happened.
- */
}
}
}
@@ -1219,6 +1339,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
data = con->eh_data;
if (!data || data->count == 0) {
*bps = NULL;
+ ret = -EINVAL;
goto out;
}
@@ -1230,15 +1351,15 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
for (; i < data->count; i++) {
(*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].bp,
+ .bp = data->bps[i].retired_page,
.size = AMDGPU_GPU_PAGE_SIZE,
- .flags = 0,
+ .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
if (data->last_reserved <= i)
- (*bps)[i].flags = 1;
- else if (data->bps[i].bo == NULL)
- (*bps)[i].flags = 2;
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (data->bps_bo[i] == NULL)
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
}
*count = data->count;
@@ -1252,109 +1373,51 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_ras *ras =
container_of(work, struct amdgpu_ras, recovery_work);
- amdgpu_device_gpu_recover(ras->adev, 0);
+ if (amdgpu_device_should_recover_gpu(ras->adev))
+ amdgpu_device_gpu_recover(ras->adev, 0);
atomic_set(&ras->in_recovery, 0);
}
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
- struct amdgpu_bo **bo_ptr)
-{
- /* no need to free it actually. */
- amdgpu_bo_free_kernel(bo_ptr, NULL, NULL);
- return 0;
-}
-
-/* reserve vram with size@offset */
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr)
-{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_param bp;
- int r = 0;
- int i;
- struct amdgpu_bo *bo;
-
- if (bo_ptr)
- *bo_ptr = NULL;
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
-
- r = amdgpu_bo_create(adev, &bp, &bo);
- if (r)
- return -EINVAL;
-
- r = amdgpu_bo_reserve(bo, false);
- if (r)
- goto error_reserve;
-
- offset = ALIGN(offset, PAGE_SIZE);
- for (i = 0; i < bo->placement.num_placement; ++i) {
- bo->placements[i].fpfn = offset >> PAGE_SHIFT;
- bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
- }
-
- ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
- r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx);
- if (r)
- goto error_pin;
-
- r = amdgpu_bo_pin_restricted(bo,
- AMDGPU_GEM_DOMAIN_VRAM,
- offset,
- offset + size);
- if (r)
- goto error_pin;
-
- if (bo_ptr)
- *bo_ptr = bo;
-
- amdgpu_bo_unreserve(bo);
- return r;
-
-error_pin:
- amdgpu_bo_unreserve(bo);
-error_reserve:
- amdgpu_bo_unref(&bo);
- return r;
-}
-
/* alloc/realloc bps array */
static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
struct ras_err_handler_data *data, int pages)
{
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
- unsigned int align_space = ALIGN(new_space, 1024);
- void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
-
- if (!tmp)
+ unsigned int align_space = ALIGN(new_space, 512);
+ void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ struct amdgpu_bo **bps_bo =
+ kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
+
+ if (!bps || !bps_bo) {
+ kfree(bps);
+ kfree(bps_bo);
return -ENOMEM;
+ }
if (data->bps) {
- memcpy(tmp, data->bps,
+ memcpy(bps, data->bps,
data->count * sizeof(*data->bps));
kfree(data->bps);
}
+ if (data->bps_bo) {
+ memcpy(bps_bo, data->bps_bo,
+ data->count * sizeof(*data->bps_bo));
+ kfree(data->bps_bo);
+ }
- data->bps = tmp;
+ data->bps = bps;
+ data->bps_bo = bps_bo;
data->space_left += align_space - old_space;
return 0;
}
/* it deal with vram only. */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- unsigned long *bps, int pages)
+ struct eeprom_table_record *bps, int pages)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = pages;
int ret = 0;
if (!con || !con->eh_data || !bps || pages <= 0)
@@ -1371,24 +1434,120 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
goto out;
}
- while (i--)
- data->bps[data->count++].bp = bps[i];
-
+ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+ data->count += pages;
data->space_left -= pages;
+
out:
mutex_unlock(&con->recovery_lock);
return ret;
}
+/*
+ * write error record array to eeprom, the function should be
+ * protected by recovery_lock
+ */
+static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ struct amdgpu_ras_eeprom_control *control;
+ int save_count;
+
+ if (!con || !con->eh_data)
+ return 0;
+
+ control = &con->eeprom_control;
+ data = con->eh_data;
+ save_count = data->count - control->num_recs;
+ /* only new entries are saved */
+ if (save_count > 0)
+ if (amdgpu_ras_eeprom_process_recods(control,
+ &data->bps[control->num_recs],
+ true,
+ save_count)) {
+ DRM_ERROR("Failed to save EEPROM table data!");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * read error record array in eeprom and reserve enough space for
+ * storing new bad pages
+ */
+static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras.ras->eeprom_control;
+ struct eeprom_table_record *bps = NULL;
+ int ret = 0;
+
+ /* no bad page record, skip eeprom access */
+ if (!control->num_recs)
+ return ret;
+
+ bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
+ return -ENOMEM;
+
+ if (amdgpu_ras_eeprom_process_recods(control, bps, false,
+ control->num_recs)) {
+ DRM_ERROR("Failed to load EEPROM table records!");
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
+
+out:
+ kfree(bps);
+ return ret;
+}
+
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int i;
+ bool ret = false;
+
+ if (!con || !con->eh_data)
+ return ret;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data)
+ goto out;
+
+ addr >>= AMDGPU_GPU_PAGE_SHIFT;
+ for (i = 0; i < data->count; i++)
+ if (addr == data->bps[i].retired_page) {
+ ret = true;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
/* called in gpu recovery/init */
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
uint64_t bp;
- struct amdgpu_bo *bo;
- int i;
+ struct amdgpu_bo *bo = NULL;
+ int i, ret = 0;
if (!con || !con->eh_data)
return 0;
@@ -1399,18 +1558,29 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
goto out;
/* reserve vram at driver post stage. */
for (i = data->last_reserved; i < data->count; i++) {
- bp = data->bps[i].bp;
+ bp = data->bps[i].retired_page;
- if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT,
- PAGE_SIZE, &bo))
- DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp);
+ /* There are two cases of reserve error should be ignored:
+ * 1) a ras bad page has been allocated (used by someone);
+ * 2) a ras bad page has been reserved (duplicate error injection
+ * for one page);
+ */
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL))
+ DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp);
- data->bps[i].bo = bo;
+ data->bps_bo[i] = bo;
data->last_reserved = i + 1;
+ bo = NULL;
}
+
+ /* continue to save bad pages to eeprom even reesrve_vram fails */
+ ret = amdgpu_ras_save_bad_pages(adev);
out:
mutex_unlock(&con->recovery_lock);
- return 0;
+ return ret;
}
/* called when driver unload */
@@ -1430,11 +1600,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
goto out;
for (i = data->last_reserved - 1; i >= 0; i--) {
- bo = data->bps[i].bo;
+ bo = data->bps_bo[i];
- amdgpu_ras_release_vram(adev, &bo);
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
- data->bps[i].bo = bo;
+ data->bps_bo[i] = bo;
data->last_reserved = i;
}
out:
@@ -1442,41 +1612,54 @@ out:
return 0;
}
-static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
-{
- /* TODO
- * write the array to eeprom when SMU disabled.
- */
- return 0;
-}
-
-static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
-{
- /* TODO
- * read the array to eeprom when SMU disabled.
- */
- return 0;
-}
-
-static int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data **data = &con->eh_data;
+ struct ras_err_handler_data **data;
+ int ret;
- *data = kmalloc(sizeof(**data),
- GFP_KERNEL|__GFP_ZERO);
- if (!*data)
- return -ENOMEM;
+ if (con)
+ data = &con->eh_data;
+ else
+ return 0;
+
+ *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ if (!*data) {
+ ret = -ENOMEM;
+ goto out;
+ }
mutex_init(&con->recovery_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
con->adev = adev;
- amdgpu_ras_load_bad_pages(adev);
- amdgpu_ras_reserve_bad_pages(adev);
+ ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
+ if (ret)
+ goto free;
+
+ if (con->eeprom_control.num_recs) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret)
+ goto free;
+ ret = amdgpu_ras_reserve_bad_pages(adev);
+ if (ret)
+ goto release;
+ }
return 0;
+
+release:
+ amdgpu_ras_release_bad_pages(adev);
+free:
+ kfree((*data)->bps);
+ kfree((*data)->bps_bo);
+ kfree(*data);
+ con->eh_data = NULL;
+out:
+ DRM_WARN("Failed to initialize ras recovery!\n");
+
+ return ret;
}
static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
@@ -1484,13 +1667,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data = con->eh_data;
+ /* recovery_init failed to init it, fini is useless */
+ if (!data)
+ return 0;
+
cancel_work_sync(&con->recovery_work);
- amdgpu_ras_save_bad_pages(adev);
amdgpu_ras_release_bad_pages(adev);
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
+ kfree(data->bps_bo);
kfree(data);
mutex_unlock(&con->recovery_lock);
@@ -1527,7 +1714,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
*supported = 0;
if (amdgpu_sriov_vf(adev) ||
- adev->asic_type != CHIP_VEGA20)
+ (adev->asic_type != CHIP_VEGA20 &&
+ adev->asic_type != CHIP_ARCTURUS))
return;
if (adev->is_atom_fw &&
@@ -1542,6 +1730,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int r;
if (con)
return 0;
@@ -1569,8 +1758,17 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
- if (amdgpu_ras_recovery_init(adev))
- goto recovery_out;
+ if (adev->nbio.funcs->init_ras_controller_interrupt) {
+ r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+ if (r)
+ return r;
+ }
amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
@@ -1582,14 +1780,84 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
con->hw_supported, con->supported);
return 0;
fs_out:
- amdgpu_ras_recovery_fini(adev);
-recovery_out:
amdgpu_ras_set_context(adev, NULL);
kfree(con);
return -EINVAL;
}
+/* helper function to handle common stuff in ip late init phase */
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_fs_if *fs_info,
+ struct ras_ih_if *ih_info)
+{
+ int r;
+
+ /* disable RAS feature per IP block if it is not supported */
+ if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+ return 0;
+ }
+
+ r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request gpu reset. will run again */
+ amdgpu_ras_request_reset_on_boot(adev,
+ ras_block->block);
+ return 0;
+ } else if (adev->in_suspend || adev->in_gpu_reset) {
+ /* in resume phase, if fail to enable ras,
+ * clean up all ras fs nodes, and disable ras */
+ goto cleanup;
+ } else
+ return r;
+ }
+
+ /* in resume phase, no need to create ras fs node */
+ if (adev->in_suspend || adev->in_gpu_reset)
+ return 0;
+
+ if (ih_info->cb) {
+ r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+ if (r)
+ goto interrupt;
+ }
+
+ amdgpu_ras_debugfs_create(adev, fs_info);
+
+ r = amdgpu_ras_sysfs_create(adev, fs_info);
+ if (r)
+ goto sysfs;
+
+ return 0;
+cleanup:
+ amdgpu_ras_sysfs_remove(adev, ras_block);
+sysfs:
+ amdgpu_ras_debugfs_remove(adev, ras_block);
+ if (ih_info->cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+interrupt:
+ amdgpu_ras_feature_enable(adev, ras_block, 0);
+ return r;
+}
+
+/* helper function to remove ras fs node and interrupt handler */
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_ih_if *ih_info)
+{
+ if (!ras_block || !ih_info)
+ return;
+
+ amdgpu_ras_sysfs_remove(adev, ras_block);
+ amdgpu_ras_debugfs_remove(adev, ras_block);
+ if (ih_info->cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+ amdgpu_ras_feature_enable(adev, ras_block, 0);
+}
+
/* do some init work after IP late init as dependence.
* and it runs in resume/gpu reset/booting up cases.
*/
@@ -1632,7 +1900,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
* See feature_enable_on_boot
*/
amdgpu_ras_disable_all_features(adev, 1);
- amdgpu_ras_reset_gpu(adev, 0);
+ amdgpu_ras_reset_gpu(adev);
}
}
@@ -1683,3 +1951,18 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
+ uint32_t hw_supported, supported;
+
+ amdgpu_ras_check_supported(adev, &hw_supported, &supported);
+ if (!hw_supported)
+ return;
+
+ if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+ DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
+
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index b2841195bd3b..a5fe29a9373e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "ta_ras_if.h"
+#include "amdgpu_ras_eeprom.h"
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -52,6 +53,236 @@ enum amdgpu_ras_block {
#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
+enum amdgpu_ras_gfx_subblock {
+ /* CPC */
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF */
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG */
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS */
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI */
+ AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ */
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+ AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges) */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA */
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA */
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI */
+ AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP */
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD */
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank */
+ AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker */
+ AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
enum amdgpu_ras_error_type {
AMDGPU_RAS_ERROR__NONE = 0,
AMDGPU_RAS_ERROR__PARITY = 1,
@@ -76,9 +307,6 @@ struct ras_common_if {
char name[32];
};
-typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry);
-
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
@@ -89,8 +317,6 @@ struct amdgpu_ras {
struct list_head head;
/* debugfs */
struct dentry *dir;
- /* debugfs ctrl */
- struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
struct bin_attribute badpages_attr;
@@ -106,10 +332,84 @@ struct amdgpu_ras {
struct mutex recovery_lock;
uint32_t flags;
+ bool reboot;
+ struct amdgpu_ras_eeprom_control eeprom_control;
};
-/* interfaces for IP */
+struct ras_fs_data {
+ char sysfs_name[32];
+ char debugfs_name[32];
+};
+
+struct ras_err_data {
+ unsigned long ue_count;
+ unsigned long ce_count;
+ unsigned long err_addr_cnt;
+ struct eeprom_table_record *err_addr;
+};
+
+struct ras_err_handler_data {
+ /* point to bad page records array */
+ struct eeprom_table_record *bps;
+ /* point to reserved bo array */
+ struct amdgpu_bo **bps_bo;
+ /* the count of entries */
+ int count;
+ /* the space can place new entries */
+ int space_left;
+ /* last reserved entry's index + 1 */
+ int last_reserved;
+};
+
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+ /* interrupt bottom half */
+ struct work_struct ih_work;
+ int inuse;
+ /* IP callback */
+ ras_ih_cb cb;
+ /* full of entries */
+ unsigned char *ring;
+ unsigned int ring_size;
+ unsigned int element_size;
+ unsigned int aligned_element_size;
+ unsigned int rptr;
+ unsigned int wptr;
+};
+struct ras_manager {
+ struct ras_common_if head;
+ /* reference count */
+ int use;
+ /* ras block link */
+ struct list_head node;
+ /* the device */
+ struct amdgpu_device *adev;
+ /* debugfs */
+ struct dentry *ent;
+ /* sysfs */
+ struct device_attribute sysfs_attr;
+ int attr_inuse;
+
+ /* fs node name */
+ struct ras_fs_data fs_data;
+
+ /* IH data */
+ struct ras_ih_data ih_data;
+
+ struct ras_err_data err_data;
+};
+
+struct ras_badpage {
+ unsigned int bp;
+ unsigned int size;
+ unsigned int flags;
+};
+
+/* interfaces for IP */
struct ras_fs_if {
struct ras_common_if head;
char sysfs_name[32];
@@ -178,26 +478,32 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block));
}
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
-int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce);
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- unsigned long *bps, int pages);
+ struct eeprom_table_record *bps, int pages);
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
- bool is_baco)
+static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ /* save bad page to eeprom before gpu reset,
+ * i2c may be unstable in gpu reset
+ */
+ if (in_task())
+ amdgpu_ras_reserve_bad_pages(adev);
+
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
return 0;
@@ -263,6 +569,13 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
int amdgpu_ras_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_fs_if *fs_info,
+ struct ras_ih_if *ih_info);
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_ih_if *ih_info);
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
@@ -296,4 +609,22 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info);
+
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+extern atomic_t amdgpu_ras_in_intr;
+
+static inline bool amdgpu_ras_intr_triggered(void)
+{
+ return !!atomic_read(&amdgpu_ras_in_intr);
+}
+
+static inline void amdgpu_ras_intr_cleared(void)
+{
+ atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
new file mode 100644
index 000000000000..2a8e04895595
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras_eeprom.h"
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include <linux/bits.h>
+#include "smu_v11_0_i2c.h"
+
+#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
+#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
+
+/*
+ * The 2 macros bellow represent the actual size in bytes that
+ * those entities occupy in the EEPROM memory.
+ * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
+ * uses uint64 to store 6b fields such as retired_page.
+ */
+#define EEPROM_TABLE_HEADER_SIZE 20
+#define EEPROM_TABLE_RECORD_SIZE 24
+
+#define EEPROM_ADDRESS_SIZE 0x2
+
+/* Table hdr is 'AMDR' */
+#define EEPROM_TABLE_HDR_VAL 0x414d4452
+#define EEPROM_TABLE_VER 0x00010000
+
+/* Assume 2 Mbit size */
+#define EEPROM_SIZE_BYTES 256000
+#define EEPROM_PAGE__SIZE_BYTES 256
+#define EEPROM_HDR_START 0
+#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE)
+#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE)
+#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8)
+
+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
+
+static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buff)
+{
+ uint32_t *pp = (uint32_t *) buff;
+
+ pp[0] = cpu_to_le32(hdr->header);
+ pp[1] = cpu_to_le32(hdr->version);
+ pp[2] = cpu_to_le32(hdr->first_rec_offset);
+ pp[3] = cpu_to_le32(hdr->tbl_size);
+ pp[4] = cpu_to_le32(hdr->checksum);
+}
+
+static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buff)
+{
+ uint32_t *pp = (uint32_t *)buff;
+
+ hdr->header = le32_to_cpu(pp[0]);
+ hdr->version = le32_to_cpu(pp[1]);
+ hdr->first_rec_offset = le32_to_cpu(pp[2]);
+ hdr->tbl_size = le32_to_cpu(pp[3]);
+ hdr->checksum = le32_to_cpu(pp[4]);
+}
+
+static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
+ unsigned char *buff)
+{
+ int ret = 0;
+ struct i2c_msg msg = {
+ .addr = 0,
+ .flags = 0,
+ .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
+ .buf = buff,
+ };
+
+
+ *(uint16_t *)buff = EEPROM_HDR_START;
+ __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
+
+ msg.addr = control->i2c_address;
+
+ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
+ if (ret < 1)
+ DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
+
+ return ret;
+}
+
+
+
+static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
+{
+ int i;
+ uint32_t tbl_sum = 0;
+
+ /* Header checksum, skip checksum field in the calculation */
+ for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
+ tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
+
+ return tbl_sum;
+}
+
+static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
+ int num)
+{
+ int i, j;
+ uint32_t tbl_sum = 0;
+
+ /* Records checksum */
+ for (i = 0; i < num; i++) {
+ struct eeprom_table_record *record = &records[i];
+
+ for (j = 0; j < sizeof(*record); j++) {
+ tbl_sum += *(((unsigned char *)record) + j);
+ }
+ }
+
+ return tbl_sum;
+}
+
+static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
+}
+
+/* Checksum = 256 -((sum of all table entries) mod 256) */
+static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num,
+ uint32_t old_hdr_byte_sum)
+{
+ /*
+ * This will update the table sum with new records.
+ *
+ * TODO: What happens when the EEPROM table is to be wrapped around
+ * and old records from start will get overridden.
+ */
+
+ /* need to recalculate updated header byte sum */
+ control->tbl_byte_sum -= old_hdr_byte_sum;
+ control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
+
+ control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
+}
+
+/* table sum mod 256 + checksum must equals 256 */
+static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
+
+ if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
+ DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
+ return false;
+ }
+
+ return true;
+}
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+ unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ int ret = 0;
+
+ mutex_lock(&control->tbl_mutex);
+
+ hdr->header = EEPROM_TABLE_HDR_VAL;
+ hdr->version = EEPROM_TABLE_VER;
+ hdr->first_rec_offset = EEPROM_RECORD_START;
+ hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
+
+ control->tbl_byte_sum = 0;
+ __update_tbl_checksum(control, NULL, 0, 0);
+ control->next_addr = EEPROM_RECORD_START;
+
+ ret = __update_table_header(control, buff);
+
+ mutex_unlock(&control->tbl_mutex);
+
+ return ret;
+
+}
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct i2c_msg msg = {
+ .addr = 0,
+ .flags = I2C_M_RD,
+ .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
+ .buf = buff,
+ };
+
+ mutex_init(&control->tbl_mutex);
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20;
+ ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
+ break;
+
+ case CHIP_ARCTURUS:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
+ ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (ret) {
+ DRM_ERROR("Failed to init I2C controller, ret:%d", ret);
+ return ret;
+ }
+
+ msg.addr = control->i2c_address;
+
+ /* Read/Create table header from EEPROM address 0 */
+ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
+ if (ret < 1) {
+ DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret);
+ return ret;
+ }
+
+ __decode_table_header_from_buff(hdr, &buff[2]);
+
+ if (hdr->header == EEPROM_TABLE_HDR_VAL) {
+ control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
+ EEPROM_TABLE_RECORD_SIZE;
+ control->tbl_byte_sum = __calc_hdr_byte_sum(control);
+ control->next_addr = EEPROM_RECORD_START;
+
+ DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
+ control->num_recs);
+
+ } else {
+ DRM_INFO("Creating new EEPROM table");
+
+ ret = amdgpu_ras_eeprom_reset_table(control);
+ }
+
+ return ret == 1 ? 0 : -EIO;
+}
+
+void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor);
+ break;
+ case CHIP_ARCTURUS:
+ smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor);
+ break;
+
+ default:
+ return;
+ }
+}
+
+static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buff)
+{
+ __le64 tmp = 0;
+ int i = 0;
+
+ /* Next are all record fields according to EEPROM page spec in LE foramt */
+ buff[i++] = record->err_type;
+
+ buff[i++] = record->bank;
+
+ tmp = cpu_to_le64(record->ts);
+ memcpy(buff + i, &tmp, 8);
+ i += 8;
+
+ tmp = cpu_to_le64((record->offset & 0xffffffffffff));
+ memcpy(buff + i, &tmp, 6);
+ i += 6;
+
+ buff[i++] = record->mem_channel;
+ buff[i++] = record->mcumc_id;
+
+ tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
+ memcpy(buff + i, &tmp, 6);
+}
+
+static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buff)
+{
+ __le64 tmp = 0;
+ int i = 0;
+
+ /* Next are all record fields according to EEPROM page spec in LE foramt */
+ record->err_type = buff[i++];
+
+ record->bank = buff[i++];
+
+ memcpy(&tmp, buff + i, 8);
+ record->ts = le64_to_cpu(tmp);
+ i += 8;
+
+ memcpy(&tmp, buff + i, 6);
+ record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
+ i += 6;
+
+ record->mem_channel = buff[i++];
+ record->mcumc_id = buff[i++];
+
+ memcpy(&tmp, buff + i, 6);
+ record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
+}
+
+/*
+ * When reaching end of EEPROM memory jump back to 0 record address
+ * When next record access will go beyond EEPROM page boundary modify bits A17/A8
+ * in I2C selector to go to next page
+ */
+static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
+{
+ uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE;
+
+ /* When all EEPROM memory used jump back to 0 address */
+ if (next_address > EEPROM_SIZE_BYTES) {
+ DRM_INFO("Reached end of EEPROM memory, jumping to 0 "
+ "and overriding old record");
+ return EEPROM_RECORD_START;
+ }
+
+ /*
+ * To check if we overflow page boundary compare next address with
+ * current and see if bits 17/8 of the EEPROM address will change
+ * If they do start from the next 256b page
+ *
+ * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2
+ */
+ if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) {
+ DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx",
+ (next_address & EEPROM_ADDR_MSB_MASK));
+
+ return (next_address & EEPROM_ADDR_MSB_MASK);
+ }
+
+ return curr_address;
+}
+
+int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records,
+ bool write,
+ int num)
+{
+ int i, ret = 0;
+ struct i2c_msg *msgs, *msg;
+ unsigned char *buffs, *buff;
+ struct eeprom_table_record *record;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS)
+ return 0;
+
+ buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
+ GFP_KERNEL);
+ if (!buffs)
+ return -ENOMEM;
+
+ mutex_lock(&control->tbl_mutex);
+
+ msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL);
+ if (!msgs) {
+ ret = -ENOMEM;
+ goto free_buff;
+ }
+
+ /* In case of overflow just start from beginning to not lose newest records */
+ if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES))
+ control->next_addr = EEPROM_RECORD_START;
+
+
+ /*
+ * TODO Currently makes EEPROM writes for each record, this creates
+ * internal fragmentation. Optimized the code to do full page write of
+ * 256b
+ */
+ for (i = 0; i < num; i++) {
+ buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
+ msg = &msgs[i];
+
+ control->next_addr = __correct_eeprom_dest_address(control->next_addr);
+
+ /*
+ * Update bits 16,17 of EEPROM address in I2C address by setting them
+ * to bits 1,2 of Device address byte
+ */
+ msg->addr = control->i2c_address |
+ ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
+ msg->flags = write ? 0 : I2C_M_RD;
+ msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
+ msg->buf = buff;
+
+ /* Insert the EEPROM dest addess, bits 0-15 */
+ buff[0] = ((control->next_addr >> 8) & 0xff);
+ buff[1] = (control->next_addr & 0xff);
+
+ /* EEPROM table content is stored in LE format */
+ if (write)
+ __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
+
+ /*
+ * The destination EEPROM address might need to be corrected to account
+ * for page or entire memory wrapping
+ */
+ control->next_addr += EEPROM_TABLE_RECORD_SIZE;
+ }
+
+ ret = i2c_transfer(&control->eeprom_accessor, msgs, num);
+ if (ret < 1) {
+ DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret);
+
+ /* TODO Restore prev next EEPROM address ? */
+ goto free_msgs;
+ }
+
+
+ if (!write) {
+ for (i = 0; i < num; i++) {
+ buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
+
+ __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
+ }
+ }
+
+ if (write) {
+ uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control);
+
+ /*
+ * Update table header with size and CRC and account for table
+ * wrap around where the assumption is that we treat it as empty
+ * table
+ *
+ * TODO - Check the assumption is correct
+ */
+ control->num_recs += num;
+ control->num_recs %= EEPROM_MAX_RECORD_NUM;
+ control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num;
+ if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES)
+ control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE +
+ control->num_recs * EEPROM_TABLE_RECORD_SIZE;
+
+ __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
+
+ __update_table_header(control, buffs);
+ } else if (!__validate_tbl_checksum(control, records, num)) {
+ DRM_WARN("EEPROM Table checksum mismatch!");
+ /* TODO Uncomment when EEPROM read/write is relliable */
+ /* ret = -EIO; */
+ }
+
+free_msgs:
+ kfree(msgs);
+
+free_buff:
+ kfree(buffs);
+
+ mutex_unlock(&control->tbl_mutex);
+
+ return ret == num ? 0 : -EIO;
+}
+
+/* Used for testing if bugs encountered */
+#if 0
+void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control)
+{
+ int i;
+ struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL);
+
+ if (!recs)
+ return;
+
+ for (i = 0; i < 1 ; i++) {
+ recs[i].address = 0xdeadbeef;
+ recs[i].retired_page = i;
+ }
+
+ if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) {
+
+ memset(recs, 0, sizeof(*recs) * 1);
+
+ control->next_addr = EEPROM_RECORD_START;
+
+ if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) {
+ for (i = 0; i < 1; i++)
+ DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu",
+ recs[i].address, recs[i].retired_page);
+ } else
+ DRM_ERROR("Failed in reading from table");
+
+ } else
+ DRM_ERROR("Failed in writing to table");
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
new file mode 100644
index 000000000000..ca78f812d436
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_RAS_EEPROM_H
+#define _AMDGPU_RAS_EEPROM_H
+
+#include <linux/i2c.h>
+
+struct amdgpu_device;
+
+enum amdgpu_ras_eeprom_err_type{
+ AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER,
+ AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
+ AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE
+};
+
+struct amdgpu_ras_eeprom_table_header {
+ uint32_t header;
+ uint32_t version;
+ uint32_t first_rec_offset;
+ uint32_t tbl_size;
+ uint32_t checksum;
+}__attribute__((__packed__));
+
+struct amdgpu_ras_eeprom_control {
+ struct amdgpu_ras_eeprom_table_header tbl_hdr;
+ struct i2c_adapter eeprom_accessor;
+ uint32_t next_addr;
+ unsigned int num_recs;
+ struct mutex tbl_mutex;
+ bool bus_locked;
+ uint32_t tbl_byte_sum;
+ uint16_t i2c_address; // 8-bit represented address
+};
+
+/*
+ * Represents single table record. Packed to be easily serialized into byte
+ * stream.
+ */
+struct eeprom_table_record {
+
+ union {
+ uint64_t address;
+ uint64_t offset;
+ };
+
+ uint64_t retired_page;
+ uint64_t ts;
+
+ enum amdgpu_ras_eeprom_err_type err_type;
+
+ union {
+ unsigned char bank;
+ unsigned char cu;
+ };
+
+ unsigned char mem_channel;
+ unsigned char mcumc_id;
+}__attribute__((__packed__));
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
+void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
+
+int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records,
+ bool write,
+ int num);
+
+void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control);
+
+#endif // _AMDGPU_RAS_EEPROM_H
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4410c97ac9b7..930316e60155 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
#include <drm/drm_print.h>
/* max number of rings */
-#define AMDGPU_MAX_RINGS 24
+#define AMDGPU_MAX_RINGS 28
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6cc3c5..6373bfb47d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
- volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
- /* set up the cs buffer */
- dst_ptr = adev->gfx.rlc.cs_ptr;
- adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
- amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 5c13c503e61f..a2ee30b16212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -83,3 +84,101 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
return csa_mc_addr;
}
+
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ void *ras_ih_info)
+{
+ int r, i;
+ struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "sdma_err_count",
+ .debugfs_name = "sdma_err_inject",
+ };
+
+ if (!ih_info)
+ return -EINVAL;
+
+ if (!adev->sdma.ras_if) {
+ adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->sdma.ras_if)
+ return -ENOMEM;
+ adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
+ adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->sdma.ras_if->sub_block_index = 0;
+ strcpy(adev->sdma.ras_if->name, "sdma");
+ }
+ fs_info.head = ih_info->head = *adev->sdma.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
+ &fs_info, ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ if (r)
+ goto late_fini;
+ }
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
+free:
+ kfree(adev->sdma.ras_if);
+ adev->sdma.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
+ adev->sdma.ras_if) {
+ struct ras_common_if *ras_if = adev->sdma.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ /* the cb member will not be used by
+ * amdgpu_ras_interrupt_remove_handler, init it only
+ * to cheat the check in ras_late_fini
+ */
+ .cb = amdgpu_sdma_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->sdma.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 35dd152f9d5c..485335267d78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -25,11 +25,17 @@
#define __AMDGPU_SDMA_H__
/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES 2
+#define AMDGPU_MAX_SDMA_INSTANCES 8
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_SDMA_IRQ_INSTANCE2,
+ AMDGPU_SDMA_IRQ_INSTANCE3,
+ AMDGPU_SDMA_IRQ_INSTANCE4,
+ AMDGPU_SDMA_IRQ_INSTANCE5,
+ AMDGPU_SDMA_IRQ_INSTANCE6,
+ AMDGPU_SDMA_IRQ_INSTANCE7,
AMDGPU_SDMA_IRQ_LAST
};
@@ -44,8 +50,18 @@ struct amdgpu_sdma_instance {
bool burst_nop;
};
+struct amdgpu_sdma_ras_funcs {
+ int (*ras_late_init)(struct amdgpu_device *adev,
+ void *ras_ih_info);
+ void (*ras_fini)(struct amdgpu_device *adev);
+ int (*query_ras_error_count)(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status);
+};
+
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
+ uint32_t num_sdma_sched;
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
struct amdgpu_irq_src ecc_irq;
@@ -53,6 +69,7 @@ struct amdgpu_sdma {
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
+ const struct amdgpu_sdma_ras_funcs *funcs;
};
/*
@@ -98,4 +115,13 @@ struct amdgpu_sdma_instance *
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ void *ras_ih_info);
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 9828f3c7c655..a09b6b9c27d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
* Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise.
*/
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit)
{
struct amdgpu_sync_entry *e;
@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
* amdgpu_sync_fence - remember to sync to this fence
*
* @sync: sync object to add fence to
- * @fence: fence to sync to
+ * @f: fence to sync to
+ * @explicit: if this is an explicit dependency
*
+ * Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct dma_fence *f, bool explicit)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit)
{
struct amdgpu_sync_entry *e;
if (!f)
return 0;
- if (amdgpu_sync_same_dev(adev, f) &&
- amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
- amdgpu_sync_keep_later(&sync->last_vm_update, f);
if (amdgpu_sync_add_later(sync, f, explicit))
return 0;
@@ -180,6 +180,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
}
/**
+ * amdgpu_sync_vm_fence - remember to sync to this VM fence
+ *
+ * @adev: amdgpu device
+ * @sync: sync object to add fence to
+ * @fence: the VM fence to add
+ *
+ * Add the fence to the sync object and remember it as VM update.
+ */
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
+{
+ if (!fence)
+ return 0;
+
+ amdgpu_sync_keep_later(&sync->last_vm_update, fence);
+ return amdgpu_sync_fence(sync, fence, false);
+}
+
+/**
* amdgpu_sync_resv - sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
@@ -190,10 +208,10 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
*/
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
- struct reservation_object *resv,
+ struct dma_resv *resv,
void *owner, bool explicit_sync)
{
- struct reservation_object_list *flist;
+ struct dma_resv_list *flist;
struct dma_fence *f;
void *fence_owner;
unsigned i;
@@ -203,16 +221,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
return -EINVAL;
/* always sync to the exclusive fence */
- f = reservation_object_get_excl(resv);
- r = amdgpu_sync_fence(adev, sync, f, false);
+ f = dma_resv_get_excl(resv);
+ r = amdgpu_sync_fence(sync, f, false);
- flist = reservation_object_get_list(resv);
+ flist = dma_resv_get_list(resv);
if (!flist || r)
return r;
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
- reservation_object_held(resv));
+ dma_resv_held(resv));
/* We only want to trigger KFD eviction fences on
* evict or move jobs. Skip KFD fences otherwise.
*/
@@ -222,13 +240,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue;
if (amdgpu_sync_same_dev(adev, f)) {
- /* VM updates are only interesting
- * for other VM updates and moves.
+ /* VM updates only sync with moves but not with user
+ * command submissions or KFD evictions fences
*/
- if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- ((owner == AMDGPU_FENCE_OWNER_VM) !=
- (fence_owner == AMDGPU_FENCE_OWNER_VM)))
+ if (owner == AMDGPU_FENCE_OWNER_VM &&
+ fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
continue;
/* Ignore fence from the same owner and explicit one as
@@ -239,7 +255,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue;
}
- r = amdgpu_sync_fence(adev, sync, f, false);
+ r = amdgpu_sync_fence(sync, f, false);
if (r)
break;
}
@@ -340,7 +356,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+ r = amdgpu_sync_fence(clone, f, e->explicit);
if (r)
return r;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 10cf23a57f17..d62c2b81d92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -27,7 +27,7 @@
#include <linux/hashtable.h>
struct dma_fence;
-struct reservation_object;
+struct dma_resv;
struct amdgpu_device;
struct amdgpu_ring;
@@ -40,16 +40,18 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct dma_fence *f, bool explicit);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit);
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
- struct reservation_object *resv,
+ struct dma_resv *resv,
void *owner,
bool explicit_sync);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
+ bool *explicit);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b66d29d5ffa2..b158230af8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
@@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 77674a7b9616..63e734a125fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__field(unsigned int, context)
__field(unsigned int, seqno)
__field(struct dma_fence *, fence)
- __field(char *, ring_name)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
@@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
);
TRACE_EVENT(amdgpu_sched_run_job,
@@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__field(unsigned int, context)
__field(unsigned int, seqno)
- __field(char *, ring_name)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
@@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -323,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
TRACE_EVENT(amdgpu_vm_set_ptes,
TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
- uint32_t incr, uint64_t flags),
- TP_ARGS(pe, addr, count, incr, flags),
+ uint32_t incr, uint64_t flags, bool direct),
+ TP_ARGS(pe, addr, count, incr, flags, direct),
TP_STRUCT__entry(
__field(u64, pe)
__field(u64, addr)
__field(u32, count)
__field(u32, incr)
__field(u64, flags)
+ __field(bool, direct)
),
TP_fast_assign(
@@ -339,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
__entry->count = count;
__entry->incr = incr;
__entry->flags = flags;
+ __entry->direct = direct;
),
- TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u",
- __entry->pe, __entry->addr, __entry->incr,
- __entry->flags, __entry->count)
+ TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
+ "direct=%d", __entry->pe, __entry->addr, __entry->incr,
+ __entry->flags, __entry->count, __entry->direct)
);
TRACE_EVENT(amdgpu_vm_copy_ptes,
- TP_PROTO(uint64_t pe, uint64_t src, unsigned count),
- TP_ARGS(pe, src, count),
+ TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
+ TP_ARGS(pe, src, count, direct),
TP_STRUCT__entry(
__field(u64, pe)
__field(u64, src)
__field(u32, count)
+ __field(bool, direct)
),
TP_fast_assign(
__entry->pe = pe;
__entry->src = src;
__entry->count = count;
+ __entry->direct = direct;
),
- TP_printk("pe=%010Lx, src=%010Lx, count=%u",
- __entry->pe, __entry->src, __entry->count)
+ TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
+ __entry->pe, __entry->src, __entry->count,
+ __entry->direct)
);
TRACE_EVENT(amdgpu_vm_flush,
@@ -468,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
- __field(const char *,name)
+ __string(ring, sched_job->base.sched->name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
@@ -476,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
),
TP_fast_assign(
- __entry->name = sched_job->base.sched->name;
+ __assign_str(ring, sched_job->base.sched->name)
__entry->id = sched_job->base.id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
- __entry->name, __entry->id,
+ __get_str(ring), __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e51b48ac48eb..dee446278417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,10 +35,13 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
+#include <linux/dma-buf.h>
+#include <linux/sizes.h>
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
@@ -54,6 +57,7 @@
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
#include "bif/bif_4_1_d.h"
static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
@@ -227,7 +231,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
- return drm_vma_node_verify_access(&abo->gem_base.vma_node,
+ return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
filp->private_data);
}
@@ -303,7 +307,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct amdgpu_copy_mem *src,
struct amdgpu_copy_mem *dst,
uint64_t size,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **f)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -440,10 +444,26 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
new_mem->num_pages << PAGE_SHIFT,
- bo->resv, &fence);
+ bo->base.resv, &fence);
if (r)
goto error;
+ /* clear the space being freed */
+ if (old_mem->mem_type == TTM_PL_VRAM &&
+ (ttm_to_amdgpu_bo(bo)->flags &
+ AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
+ struct dma_fence *wipe_fence = NULL;
+
+ r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
+ NULL, &wipe_fence);
+ if (r) {
+ goto error;
+ } else if (wipe_fence) {
+ dma_fence_put(fence);
+ fence = wipe_fence;
+ }
+ }
+
/* Always block for VM page tables before committing the new location */
if (bo->type == ttm_bo_type_kernel)
r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
@@ -468,15 +488,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
- struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_place placements;
struct ttm_placement placement;
int r;
- adev = amdgpu_ttm_adev(bo->bdev);
-
/* create space/pages for new_mem in GTT space */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -527,15 +544,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
- struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_placement placement;
struct ttm_place placements;
int r;
- adev = amdgpu_ttm_adev(bo->bdev);
-
/* make space in GTT for old_mem buffer */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -747,6 +761,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
*/
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
+ struct drm_gem_object *gobj;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
@@ -756,6 +771,20 @@ struct amdgpu_ttm_tt {
#endif
};
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
/**
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
@@ -763,97 +792,89 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT 16
-
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
- struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
+ unsigned long timeout;
+ struct mm_struct *mm;
unsigned long i;
- uint64_t *pfns;
- int retry = 0;
int r = 0;
- if (!mm) /* Happens during process shutdown */
- return -ESRCH;
-
- if (unlikely(!mirror)) {
- DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
- r = -EFAULT;
- goto out;
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
}
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
- r = -EFAULT;
- goto out;
- }
- if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
- vma->vm_file)) {
- r = -EPERM;
- goto out;
- }
+ /* Another get_user_pages is running at the same time?? */
+ if (WARN_ON(gtt->range))
+ return -EFAULT;
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
+ range->notifier = &bo->notifier;
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->start = bo->notifier.interval_tree.start;
+ range->end = bo->notifier.interval_tree.last + 1;
+ range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ range->default_flags |= range->flags[HMM_PFN_WRITE];
- pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+ GFP_KERNEL);
+ if (unlikely(!range->pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
- amdgpu_hmm_init_range(range);
- range->default_flags = range->flags[HMM_PFN_VALID];
- range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
- 0 : range->flags[HMM_PFN_WRITE];
- range->pfn_flags_mask = 0;
- range->pfns = pfns;
- hmm_range_register(range, mirror, start,
- start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+ up_read(&mm->mmap_sem);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
- /*
- * Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_fault() below under the
- * mmap_sem to ascertain the validity of the range.
- */
- hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem);
-
- r = hmm_range_fault(range, true);
- if (unlikely(r < 0)) {
- if (likely(r == -EAGAIN)) {
- /*
- * return -EAGAIN, mmap_sem is dropped
- */
- if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
- goto retry;
- else
- pr_err("Retry hmm fault too many times\n");
- }
-
- goto out_up_read;
- }
-
+ r = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem);
+ if (unlikely(r <= 0)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
for (i = 0; i < ttm->num_pages; i++) {
- pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ /* FIXME: The pages cannot be touched outside the notifier_lock */
+ pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
if (unlikely(!pages[i])) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
- i, pfns[i]);
+ i, range->pfns[i]);
r = -ENOMEM;
goto out_free_pfns;
@@ -861,18 +882,18 @@ retry:
}
gtt->range = range;
+ mmput(mm);
return 0;
-out_up_read:
- if (likely(r != -EAGAIN))
- up_read(&mm->mmap_sem);
+out_unlock:
+ up_read(&mm->mmap_sem);
out_free_pfns:
- hmm_range_unregister(range);
- kvfree(pfns);
+ kvfree(range->pfns);
out_free_ranges:
kfree(range);
out:
+ mmput(mm);
return r;
}
@@ -897,15 +918,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
"No user pages to check\n");
if (gtt->range) {
- r = hmm_range_valid(gtt->range);
- hmm_range_unregister(gtt->range);
-
+ /*
+ * FIXME: Must always hold notifier_lock for this, and must
+ * not ignore the return code.
+ */
+ r = mmu_interval_read_retry(gtt->range->notifier,
+ gtt->range->notifier_seq);
kvfree(gtt->range->pfns);
kfree(gtt->range);
gtt->range = NULL;
}
- return r;
+ return !r;
}
#endif
@@ -986,10 +1010,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range &&
- ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
- gtt->range->pfns[0]))
- WARN_ONCE(1, "Missing get_user_page_done\n");
+ if (gtt->range) {
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->pages[i] !=
+ hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[i]))
+ break;
+ }
+
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ }
#endif
}
@@ -1216,16 +1248,14 @@ static struct ttm_backend_func amdgpu_backend_func = {
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
- struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt;
- adev = amdgpu_ttm_adev(bo->bdev);
-
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) {
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
+ gtt->gobj = &bo->base;
/* allocate space for the uninitialized page entries */
if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
@@ -1246,7 +1276,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt && gtt->userptr) {
@@ -1259,7 +1288,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
return 0;
}
- if (slave && ttm->sg) {
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+ if (!ttm->sg) {
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ attach = gtt->gobj->import_attach;
+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ ttm->sg = sgt;
+ }
+
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address,
ttm->num_pages);
@@ -1286,9 +1327,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
*/
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
- struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+ struct amdgpu_device *adev;
if (gtt && gtt->userptr) {
amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1297,7 +1337,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
return;
}
- if (slave)
+ if (ttm->sg && gtt->gobj->import_attach) {
+ struct dma_buf_attachment *attach;
+
+ attach = gtt->gobj->import_attach;
+ dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+ ttm->sg = NULL;
+ return;
+ }
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return;
adev = amdgpu_ttm_adev(ttm->bdev);
@@ -1470,26 +1519,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
{
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
- struct reservation_object_list *flist;
+ struct dma_resv_list *flist;
struct dma_fence *f;
int i;
- /* Don't evict VM page tables while they are busy, otherwise we can't
- * cleanly handle page faults.
- */
if (bo->type == ttm_bo_type_kernel &&
- !reservation_object_test_signaled_rcu(bo->resv, true))
+ !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
return false;
/* If bo is a KFD BO, check if the bo belongs to the current process.
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
- flist = reservation_object_get_list(bo->resv);
+ flist = dma_resv_get_list(bo->base.resv);
if (flist) {
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
- reservation_object_held(bo->resv));
+ dma_resv_held(bo->base.resv));
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
@@ -1599,6 +1645,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
.move = &amdgpu_bo_move,
.verify_access = &amdgpu_verify_access,
.move_notify = &amdgpu_bo_move_notify,
+ .release_notify = &amdgpu_bo_release_notify,
.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
.io_mem_free = &amdgpu_ttm_io_mem_free,
@@ -1632,81 +1679,96 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_param bp;
- int r = 0;
- int i;
- u64 vram_size = adev->gmc.visible_vram_size;
- u64 offset = adev->fw_vram_usage.start_offset;
- u64 size = adev->fw_vram_usage.size;
- struct amdgpu_bo *bo;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = adev->fw_vram_usage.size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
+ uint64_t vram_size = adev->gmc.visible_vram_size;
+
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
- if (adev->fw_vram_usage.size > 0 &&
- adev->fw_vram_usage.size <= vram_size) {
+ if (adev->fw_vram_usage.size == 0 ||
+ adev->fw_vram_usage.size > vram_size)
+ return 0;
- r = amdgpu_bo_create(adev, &bp,
- &adev->fw_vram_usage.reserved_bo);
- if (r)
- goto error_create;
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->fw_vram_usage.start_offset,
+ adev->fw_vram_usage.size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->fw_vram_usage.reserved_bo,
+ &adev->fw_vram_usage.va);
+}
- r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
- if (r)
- goto error_reserve;
+/*
+ * Memoy training reservation functions
+ */
- /* remove the original mem node and create a new one at the
- * request position
- */
- bo = adev->fw_vram_usage.reserved_bo;
- offset = ALIGN(offset, PAGE_SIZE);
- for (i = 0; i < bo->placement.num_placement; ++i) {
- bo->placements[i].fpfn = offset >> PAGE_SHIFT;
- bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
- }
+/**
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free memory training reserved vram if it has been reserved.
+ */
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
- ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
- r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
- &bo->tbo.mem, &ctx);
- if (r)
- goto error_pin;
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
+ ctx->c2p_bo = NULL;
- r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
- AMDGPU_GEM_DOMAIN_VRAM,
- adev->fw_vram_usage.start_offset,
- (adev->fw_vram_usage.start_offset +
- adev->fw_vram_usage.size));
- if (r)
- goto error_pin;
- r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
- &adev->fw_vram_usage.va);
- if (r)
- goto error_kmap;
+ return 0;
+}
+
+static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
+{
+ if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
+ vram_size -= SZ_1M;
+
+ return ALIGN(vram_size, SZ_1M);
+}
+
+/**
+ * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from memory training.
+ */
+static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ memset(ctx, 0, sizeof(*ctx));
+ if (!adev->fw_vram_usage.mem_train_support) {
+ DRM_DEBUG("memory training does not support!\n");
+ return 0;
+ }
+
+ ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
+ ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+ ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+
+ ret = amdgpu_bo_create_kernel_at(adev,
+ ctx->c2p_train_data_offset,
+ ctx->train_data_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &ctx->c2p_bo,
+ NULL);
+ if (ret) {
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ return ret;
}
- return r;
-error_kmap:
- amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
-error_pin:
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-error_reserve:
- amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
-error_create:
- adev->fw_vram_usage.va = NULL;
- adev->fw_vram_usage.reserved_bo = NULL;
- return r;
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+ return 0;
}
+
/**
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1721,6 +1783,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
uint64_t gtt_size;
int r;
u64 vis_vram_limit;
+ void *stolen_vga_buf;
mutex_init(&adev->mman.gtt_window_lock);
@@ -1728,7 +1791,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = ttm_bo_device_init(&adev->mman.bdev,
&amdgpu_bo_driver,
adev->ddev->anon_inode->i_mapping,
- adev->need_dma32);
+ adev->ddev->vma_offset_manager,
+ dma_addressing_limited(adev->dev));
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
@@ -1768,6 +1832,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
+ /*
+ *The reserved vram for memory training must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_training_reserve_vram_init(adev);
+ if (r)
+ return r;
+
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
@@ -1775,9 +1847,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
- NULL, NULL);
+ NULL, &stolen_vga_buf);
if (r)
return r;
+
+ /*
+ * reserve one TMR (64K) memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
+ */
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE,
+ DISCOVERY_TMR_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->discovery_memory,
+ NULL);
+ if (r)
+ return r;
+
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -1839,8 +1925,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*/
void amdgpu_ttm_late_init(struct amdgpu_device *adev)
{
+ void *stolen_vga_buf;
/* return the VGA stolen memory (if any) back to VRAM */
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
}
/**
@@ -1852,7 +1939,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
return;
amdgpu_ttm_debugfs_fini(adev);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ /* return the IP Discovery TMR memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
adev->mman.aper_base_kaddr = NULL;
@@ -1888,11 +1979,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
if (enable) {
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
ring = adev->mman.buffer_funcs_ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->mman.entity,
+ DRM_SCHED_PRIORITY_KERNEL, &sched,
+ 1, NULL);
if (r) {
DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
r);
@@ -1948,10 +2041,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE;
- num_dw = adev->mman.buffer_funcs->copy_num_dw;
- while (num_dw & 0x7)
- num_dw++;
-
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8;
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
@@ -1992,7 +2082,7 @@ error_free:
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush)
{
@@ -2011,11 +2101,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
- num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
-
- /* for IB padding */
- while (num_dw & 0x7)
- num_dw++;
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
if (r)
@@ -2066,7 +2152,7 @@ error_free:
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index caa76c693700..0dddedc06ae3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -38,6 +38,8 @@
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
+#define AMDGPU_POISON 0xd0bed0be
+
struct amdgpu_mman {
struct ttm_bo_device bdev;
bool mem_global_referenced;
@@ -83,18 +85,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct amdgpu_copy_mem *src,
struct amdgpu_copy_mem *dst,
uint64_t size,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **f);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
- struct reservation_object *resv,
+ struct dma_resv *resv,
struct dma_fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index bfaa0eac3213..9ef312428231 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -83,8 +83,8 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)
const struct smc_firmware_header_v2_0 *v2_hdr =
container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0);
- DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes));
- DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes));
+ DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes));
+ DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes));
} else {
DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);
}
@@ -269,6 +269,16 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("kdb_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes));
}
+ if (version_minor == 2) {
+ const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0);
+ DRM_DEBUG("kdb_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb_header_version));
+ DRM_DEBUG("kdb_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes));
+ DRM_DEBUG("kdb_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes));
+ }
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
version_major, version_minor);
@@ -350,11 +360,16 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
+
default:
DRM_ERROR("Unknown firmware load type\n");
}
@@ -432,6 +447,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
+ const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@@ -445,6 +461,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
+ dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -455,7 +472,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -491,6 +509,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) {
+ ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+ memcpy(ucode->kaddr,
+ (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes)),
+ ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index c1fb6dc86440..b0e656409c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -90,6 +90,15 @@ struct psp_firmware_header_v1_1 {
uint32_t kdb_size_bytes;
};
+/* version_major=1, version_minor=2 */
+struct psp_firmware_header_v1_2 {
+ struct psp_firmware_header_v1_0 v1_0;
+ uint32_t reserve[3];
+ uint32_t kdb_header_version;
+ uint32_t kdb_offset_bytes;
+ uint32_t kdb_size_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -99,6 +108,12 @@ struct ta_firmware_header_v1_0 {
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_offset_bytes;
uint32_t ta_ras_size_bytes;
+ uint32_t ta_hdcp_ucode_version;
+ uint32_t ta_hdcp_offset_bytes;
+ uint32_t ta_hdcp_size_bytes;
+ uint32_t ta_dtm_ucode_version;
+ uint32_t ta_dtm_offset_bytes;
+ uint32_t ta_dtm_size_bytes;
};
/* version_major=1, version_minor=0 */
@@ -236,6 +251,13 @@ struct dmcu_firmware_header_v1_0 {
uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
};
+/* version_major=1, version_minor=0 */
+struct dmcub_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t inst_const_bytes; /* size of instruction region, in bytes */
+ uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
+};
+
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@@ -253,6 +275,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
+ struct dmcub_firmware_header_v1_0 dmcub;
uint8_t raw[0x100];
};
@@ -262,6 +285,12 @@ union amdgpu_firmware_header {
enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA0 = 0,
AMDGPU_UCODE_ID_SDMA1,
+ AMDGPU_UCODE_ID_SDMA2,
+ AMDGPU_UCODE_ID_SDMA3,
+ AMDGPU_UCODE_ID_SDMA4,
+ AMDGPU_UCODE_ID_SDMA5,
+ AMDGPU_UCODE_ID_SDMA6,
+ AMDGPU_UCODE_ID_SDMA7,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
@@ -271,20 +300,22 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
- AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+ AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
AMDGPU_UCODE_ID_VCN,
+ AMDGPU_UCODE_ID_VCN1,
AMDGPU_UCODE_ID_DMCU_ERAM,
AMDGPU_UCODE_ID_DMCU_INTV,
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
+ AMDGPU_UCODE_ID_DMCUB,
AMDGPU_UCODE_ID_MAXIMUM,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
new file mode 100644
index 000000000000..f4d40855147b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras.h"
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "umc_err_count",
+ .debugfs_name = "umc_err_inject",
+ };
+ struct ras_ih_if ih_info = {
+ .cb = amdgpu_umc_process_ras_data_cb,
+ };
+
+ if (!adev->umc.ras_if) {
+ adev->umc.ras_if =
+ kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->umc.ras_if)
+ return -ENOMEM;
+ adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
+ adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if->sub_block_index = 0;
+ strcpy(adev->umc.ras_if->name, "umc");
+ }
+ ih_info.head = fs_info.head = *adev->umc.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ /* ras init of specific umc version */
+ if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
+ adev->umc.funcs->err_cnt_init(adev);
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
+free:
+ kfree(adev->umc.ras_if);
+ adev->umc.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
+ adev->umc.ras_if) {
+ struct ras_common_if *ras_if = adev->umc.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ .cb = amdgpu_umc_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
+
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if(!err_data->err_addr)
+ DRM_WARN("Failed to alloc memory for umc error address record!\n");
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
+ }
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count) {
+ if (err_data->err_addr_cnt &&
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt))
+ DRM_WARN("Failed to add ras bad page!\n");
+
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ kfree(err_data->err_addr);
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->umc.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
new file mode 100644
index 000000000000..a615a1eb750b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_UMC_H__
+#define __AMDGPU_UMC_H__
+
+struct amdgpu_umc_funcs {
+ void (*err_cnt_init)(struct amdgpu_device *adev);
+ int (*ras_late_init)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*query_ras_error_address)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*init_registers)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_umc {
+ /* max error count in one ras query call */
+ uint32_t max_ras_err_cnt_per_query;
+ /* number of umc channel instance with memory map register access */
+ uint32_t channel_inst_num;
+ /* number of umc instance with memory map register access */
+ uint32_t umc_inst_num;
+ /* UMC regiser per channel offset */
+ uint32_t channel_offs;
+ /* channel index table of interleaved memory */
+ const uint32_t *channel_idx_tbl;
+ struct ras_common_if *ras_if;
+
+ const struct amdgpu_umc_funcs *funcs;
+};
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5b2fea3b4a2c..a92f3b18e657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -39,6 +39,8 @@
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
+#include "amdgpu_ras.h"
+
/* 1 second timeout */
#define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000)
@@ -297,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int i, j;
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
drm_sched_entity_destroy(&adev->uvd.entity);
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
@@ -327,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
int r;
ring = &adev->uvd.inst[0].ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
if (r) {
DRM_ERROR("Failed setting up UVD kernel entity.\n");
return r;
@@ -346,6 +350,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
unsigned size;
void *ptr;
int i, j;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -372,8 +377,16 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
- memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+ /* re-write 0 since err_event_athub will corrupt VCPU buffer */
+ if (in_ras_intr)
+ memset(adev->uvd.inst[j].saved_bo, 0, size);
+ else
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
}
+
+ if (in_ras_intr)
+ DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+
return 0;
}
@@ -1073,7 +1086,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
true, false,
msecs_to_jiffies(10));
if (r == 0)
@@ -1085,7 +1098,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
goto err_free;
} else {
- r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+ r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (r)
goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b70b3c45bb29..59ddba137946 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -80,6 +80,11 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence);
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence);
/**
* amdgpu_vce_init - allocate memory, load vce firmware
@@ -211,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
+ cancel_delayed_work_sync(&adev->vce.idle_work);
drm_sched_entity_destroy(&adev->vce.entity);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
@@ -234,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
int amdgpu_vce_entity_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
int r;
ring = &adev->vce.ring[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCE run queue.\n");
return r;
@@ -428,14 +435,15 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
*
* Open up a stream for HW test
*/
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -444,7 +452,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -476,8 +484,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000001;
for (i = ib->length_dw; i < ib_size_dw; ++i)
@@ -507,8 +515,8 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence)
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
@@ -644,7 +652,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
+ DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
addr, lo, hi);
return -EINVAL;
}
@@ -1110,13 +1118,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
- r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
@@ -1132,5 +1147,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 30ea54dd9117..d6d83a3ec803 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -58,10 +58,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_entity_init(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence);
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2e12eeb314a7..f96464e2c157 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -28,30 +28,29 @@
#include <linux/module.h>
#include <linux/pci.h>
-#include <drm/drm.h>
-
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vcn.h"
#include "soc15d.h"
-#include "soc15_common.h"
-
-#include "vcn/vcn_1_0_offset.h"
-#include "vcn/vcn_1_0_sh_mask.h"
-
-/* 1 second timeout */
-#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
+MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
+MODULE_FIRMWARE(FIRMWARE_RENOIR);
MODULE_FIRMWARE(FIRMWARE_NAVI10);
+MODULE_FIRMWARE(FIRMWARE_NAVI14);
+MODULE_FIRMWARE(FIRMWARE_NAVI12);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -61,7 +60,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
- int r;
+ int i, r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
@@ -74,12 +73,36 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
else
fw_name = FIRMWARE_RAVEN;
break;
+ case CHIP_ARCTURUS:
+ fw_name = FIRMWARE_ARCTURUS;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
+ case CHIP_RENOIR:
+ fw_name = FIRMWARE_RENOIR;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
case CHIP_NAVI10:
fw_name = FIRMWARE_NAVI10;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
+ case CHIP_NAVI14:
+ fw_name = FIRMWARE_NAVI14;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
+ case CHIP_NAVI12:
+ fw_name = FIRMWARE_NAVI12;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
default:
return -EINVAL;
}
@@ -133,22 +156,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
- &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
- return r;
- }
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
- &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
return r;
}
+
+ if (adev->vcn.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
+ }
}
return 0;
@@ -156,26 +185,29 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
- int i;
-
- kvfree(adev->vcn.saved_bo);
+ int i, j;
- if (adev->vcn.indirect_sram) {
- amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
- &adev->vcn.dpg_sram_gpu_addr,
- (void **)&adev->vcn.dpg_sram_cpu_addr);
- }
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
- amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
- &adev->vcn.gpu_addr,
- (void **)&adev->vcn.cpu_addr);
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ if (adev->vcn.indirect_sram) {
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
+ &adev->vcn.inst[j].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+ }
+ kvfree(adev->vcn.inst[j].saved_bo);
- amdgpu_ring_fini(&adev->vcn.ring_dec);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
+ &adev->vcn.inst[j].gpu_addr,
+ (void **)&adev->vcn.inst[j].cpu_addr);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
- amdgpu_ring_fini(&adev->vcn.ring_jpeg);
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ }
release_firmware(adev->vcn.fw);
@@ -186,21 +218,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
+ int i;
cancel_delayed_work_sync(&adev->vcn.idle_work);
- if (adev->vcn.vcpu_bo == NULL)
- return 0;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
- size = amdgpu_bo_size(adev->vcn.vcpu_bo);
- ptr = adev->vcn.cpu_addr;
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.saved_bo)
- return -ENOMEM;
-
- memcpy_fromio(adev->vcn.saved_bo, ptr, size);
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ }
return 0;
}
@@ -208,32 +244,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
+ int i;
- if (adev->vcn.vcpu_bo == NULL)
- return -EINVAL;
-
- size = amdgpu_bo_size(adev->vcn.vcpu_bo);
- ptr = adev->vcn.cpu_addr;
-
- if (adev->vcn.saved_bo != NULL) {
- memcpy_toio(ptr, adev->vcn.saved_bo, size);
- kvfree(adev->vcn.saved_bo);
- adev->vcn.saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned offset;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
+
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned offset;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
+ memset_io(ptr, 0, size);
}
- memset_io(ptr, 0, size);
}
-
return 0;
}
@@ -241,39 +281,36 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
- unsigned int fences = 0;
- unsigned int i;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
- }
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- struct dpg_pause_state new_state;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ }
- if (fences)
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
- if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- else
- new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+ if (fence[j])
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.pause_dpg_mode(adev, &new_state);
- }
+ adev->vcn.pause_dpg_mode(adev, j, &new_state);
+ }
- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
+ fences += fence[j];
+ }
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
- if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
- else
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -286,11 +323,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
- if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
- else
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -299,24 +333,17 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
unsigned int i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
}
if (fences)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- else
- new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
}
}
@@ -332,7 +359,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD);
+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
@@ -340,7 +367,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.external.scratch9);
+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -466,9 +493,14 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
long r;
+ /* temporarily disable ib test for sriov */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -517,13 +549,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -531,14 +564,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -569,13 +602,14 @@ err:
}
static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -583,14 +617,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -622,129 +656,38 @@ err:
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
- if (r)
- goto error;
-
- r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
- if (r)
- goto error;
-
- r = dma_fence_wait_timeout(fence, false, timeout);
- if (r == 0)
- r = -ETIMEDOUT;
- else if (r > 0)
- r = 0;
-
-error:
- dma_fence_put(fence);
- return r;
-}
-
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t tmp = 0;
- unsigned i;
- int r;
+ /* temporarily disable ib test for sriov */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
- WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD);
- r = amdgpu_ring_alloc(ring, 3);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0));
- amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_commit(ring);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
- }
-
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
-
- return r;
-}
-
-static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
-{
- struct amdgpu_device *adev = ring->adev;
- struct amdgpu_job *job;
- struct amdgpu_ib *ib;
- struct dma_fence *f = NULL;
- const unsigned ib_size_dw = 16;
- int i, r;
-
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
- return r;
-
- ib = &job->ibs[0];
-
- ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
- ib->ptr[1] = 0xDEADBEEF;
- for (i = 2; i < 16; i += 2) {
- ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
- ib->ptr[i+1] = 0;
- }
- ib->length_dw = 16;
-
- r = amdgpu_job_submit_direct(job, ring, &f);
- if (r)
- goto err;
-
- if (fence)
- *fence = dma_fence_get(f);
- dma_fence_put(f);
-
- return 0;
-
-err:
- amdgpu_job_free(job);
- return r;
-}
-
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t tmp = 0;
- unsigned i;
- struct dma_fence *fence = NULL;
- long r = 0;
+ goto error;
- r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
+ r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
r = dma_fence_wait_timeout(fence, false, timeout);
- if (r == 0) {
+ if (r == 0)
r = -ETIMEDOUT;
- goto error;
- } else if (r < 0) {
- goto error;
- } else {
+ else if (r > 0)
r = 0;
- }
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
- }
-
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
- dma_fence_put(fence);
error:
+ dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 19661c645703..6fe057329de2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -30,6 +30,12 @@
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
+#define AMDGPU_MAX_VCN_INSTANCES 2
+#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
+
+#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
+#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
+
#define VCN_DEC_KMD_CMD 0x80000000
#define VCN_DEC_CMD_FENCE 0x00000000
#define VCN_DEC_CMD_TRAP 0x00000001
@@ -51,33 +57,41 @@
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
-#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
- ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
+#define mmUVD_REG_XX_MASK 0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
- RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
+ RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \
do { \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
-#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \
+#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
bool video_range, aon_range; \
\
- addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
@@ -95,27 +109,27 @@
internal_reg_offset >>= 2; \
})
-#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \
- ({ \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
- (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \
+#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \
- do { \
- if (!indirect) { \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
- (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- } else { \
- *adev->vcn.dpg_sram_curr_addr++ = offset; \
- *adev->vcn.dpg_sram_curr_addr++ = value; \
- } \
+#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
+ } \
} while (0)
enum engine_status_constants {
@@ -146,36 +160,52 @@ struct amdgpu_vcn_reg{
unsigned data1;
unsigned cmd;
unsigned nop;
+ unsigned context_id;
+ unsigned ib_vmid;
+ unsigned ib_bar_low;
+ unsigned ib_bar_high;
+ unsigned ib_size;
+ unsigned gp_scratch8;
unsigned scratch9;
- unsigned jpeg_pitch;
};
-struct amdgpu_vcn {
+struct amdgpu_vcn_inst {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
- unsigned fw_version;
void *saved_bo;
- struct delayed_work idle_work;
- const struct firmware *fw; /* VCN firmware */
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
- struct amdgpu_ring ring_jpeg;
struct amdgpu_irq_src irq;
- unsigned num_enc_rings;
- enum amd_powergating_state cur_state;
- struct dpg_pause_state pause_state;
- struct amdgpu_vcn_reg internal, external;
- int (*pause_dpg_mode)(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
-
- bool indirect_sram;
+ struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
void *dpg_sram_cpu_addr;
uint64_t dpg_sram_gpu_addr;
uint32_t *dpg_sram_curr_addr;
};
+struct amdgpu_vcn {
+ unsigned fw_version;
+ struct delayed_work idle_work;
+ const struct firmware *fw; /* VCN firmware */
+ unsigned num_enc_rings;
+ enum amd_powergating_state cur_state;
+ bool indirect_sram;
+
+ uint8_t num_vcn_inst;
+ struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct amdgpu_vcn_reg internal;
+ struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS];
+ struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
+ uint32_t num_vcn_enc_sched;
+ uint32_t num_vcn_dec_sched;
+
+ unsigned harvest_config;
+ int (*pause_dpg_mode)(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+};
+
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
int amdgpu_vcn_suspend(struct amdgpu_device *adev);
@@ -189,7 +219,4 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 59dd204498c5..adc813cde8e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -45,98 +45,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->pg_flags = 0;
}
-uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
-{
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
-
- BUG_ON(!ring->funcs->emit_rreg);
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_rreg(ring, reg);
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for gpu reset case because this way may
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
- * never return if we keep waiting in virt_kiq_rreg, which cause
- * gpu_recover() hang there.
- *
- * also don't wait anymore for IRQ context
- * */
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
- goto failed_kiq_read;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq_read;
-
- return adev->wb.wb[adev->virt.reg_val_offs];
-
-failed_kiq_read:
- pr_err("failed to read reg:%x\n", reg);
- return ~0;
-}
-
-void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
-{
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
-
- BUG_ON(!ring->funcs->emit_wreg);
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_wreg(ring, reg, v);
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for gpu reset case because this way may
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
- * never return if we keep waiting in virt_kiq_rreg, which cause
- * gpu_recover() hang there.
- *
- * also don't wait anymore for IRQ context
- * */
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
- goto failed_kiq_write;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
-
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq_write;
-
- return;
-
-failed_kiq_write:
- pr_err("failed to write reg:%x\n", reg);
-}
-
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
@@ -379,99 +287,3 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
}
}
}
-
-static uint32_t parse_clk(char *buf, bool min)
-{
- char *ptr = buf;
- uint32_t clk = 0;
-
- do {
- ptr = strchr(ptr, ':');
- if (!ptr)
- break;
- ptr+=2;
- if (kstrtou32(ptr, 10, &clk))
- return 0;
- } while (!min);
-
- return clk * 100;
-}
-
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest)
-{
- char *buf = NULL;
- uint32_t clk = 0;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
- clk = parse_clk(buf, lowest);
-
- kfree(buf);
-
- return clk;
-}
-
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
-{
- char *buf = NULL;
- uint32_t clk = 0;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
- clk = parse_clk(buf, lowest);
-
- kfree(buf);
-
- return clk;
-}
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
-{
- struct amdgpu_virt *virt = &adev->virt;
-
- if (virt->ops && virt->ops->init_reg_access_mode)
- virt->ops->init_reg_access_mode(adev);
-}
-
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
-{
- bool ret = false;
- struct amdgpu_virt *virt = &adev->virt;
-
- if (amdgpu_sriov_vf(adev)
- && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
- ret = true;
-
- return ret;
-}
-
-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
-{
- bool ret = false;
- struct amdgpu_virt *virt = &adev->virt;
-
- if (amdgpu_sriov_vf(adev)
- && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
- && !(amdgpu_sriov_runtime(adev)))
- ret = true;
-
- return ret;
-}
-
-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
-{
- bool ret = false;
- struct amdgpu_virt *virt = &adev->virt;
-
- if (amdgpu_sriov_vf(adev)
- && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
- ret = true;
-
- return ret;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f5107731e9c4..daaf909d009a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
-/* According to the fw feature, some new reg access modes are supported */
-#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
-#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
-#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
-#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
-
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -63,9 +57,6 @@ struct amdgpu_virt_ops {
int (*reset_gpu)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
- int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
- int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
- void (*init_reg_access_mode)(struct amdgpu_device *adev);
};
/*
@@ -92,8 +83,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
/* VRAM LOST by GIM */
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
- /* HW PERF SIM in GIM */
- AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3),
+ /* PP ONE VF MODE in GIM */
+ AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
};
struct amd_sriov_msg_pf2vf_info_header {
@@ -264,8 +255,6 @@ struct amdgpu_virt {
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
uint32_t gim_feature;
- /* protect DPM events to GIM */
- struct mutex dpm_mutex;
uint32_t reg_access_mode;
};
@@ -293,13 +282,11 @@ static inline bool is_virtual_machine(void)
#endif
}
-#define amdgim_is_hwperf(adev) \
- ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION)
+#define amdgpu_sriov_is_pp_one_vf(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
-uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
uint32_t ref, uint32_t mask);
@@ -313,12 +300,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
unsigned int key,
unsigned int chksum);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 24c3c05e2fb7..d16231d6a790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -83,6 +83,32 @@ struct amdgpu_prt_cb {
};
/**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_nofs_save();
+ return 1;
+ }
+ return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_nofs_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
+/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
* @adev: amdgpu_device pointer
@@ -130,7 +156,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
if (level == adev->vm_manager.root_level)
/* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
else if (level != AMDGPU_VM_PTB)
/* Everything in between */
return 512;
@@ -302,7 +329,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+ if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)
return;
vm->bulk_moveable = false;
@@ -341,7 +368,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
}
-/**
+/*
* amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
*/
struct amdgpu_vm_pt_cursor {
@@ -482,6 +509,7 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
*
* @adev: amdgpu_device structure
* @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
* @cursor: state to initialize
*
* Starts a deep first traversal of the PD/PT tree.
@@ -535,7 +563,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
amdgpu_vm_pt_ancestor(cursor);
}
-/**
+/*
* for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
*/
#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
@@ -560,12 +588,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
{
entry->priority = 0;
entry->tv.bo = &vm->root.base.bo->tbo;
- /* One for the VM updates, one for TTM and one for the CS job */
- entry->tv.num_shared = 3;
+ /* One for TTM and one for the CS job */
+ entry->tv.num_shared = 2;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
+/**
+ * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
+ *
+ * @bo: BO which was removed from the LRU
+ *
+ * Make sure the bulk_moveable flag is updated when a BO is removed from the
+ * LRU.
+ */
void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_bo *abo;
@@ -583,7 +619,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- if (abo->tbo.resv == vm->root.base.bo->tbo.resv)
+ if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
vm->bulk_moveable = false;
}
@@ -600,21 +636,18 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
struct amdgpu_vm_bo_base *bo_base;
-#if 0
if (vm->bulk_moveable) {
- spin_lock(&glob->lru_lock);
+ spin_lock(&ttm_bo_glob.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&ttm_bo_glob.lru_lock);
return;
}
-#endif
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&glob->lru_lock);
+ spin_lock(&ttm_bo_glob.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -626,7 +659,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
&vm->lru_bulk_move);
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&ttm_bo_glob.lru_lock);
vm->bulk_moveable = true;
}
@@ -649,7 +682,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
struct amdgpu_vm_bo_base *bo_base, *tmp;
- int r = 0;
+ int r;
vm->bulk_moveable &= list_empty(&vm->evicted);
@@ -658,7 +691,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
r = validate(param, bo);
if (r)
- break;
+ return r;
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -671,7 +704,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
}
- return r;
+ amdgpu_vm_eviction_lock(vm);
+ vm->evicting = false;
+ amdgpu_vm_eviction_unlock(vm);
+
+ return 0;
}
/**
@@ -695,6 +732,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @vm: VM to clear BO from
* @bo: BO to clear
+ * @direct: use a direct update
*
* Root PD needs to be reserved when calling this.
*
@@ -703,7 +741,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
+ struct amdgpu_bo *bo,
+ bool direct)
{
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
@@ -762,6 +801,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_KFD, NULL);
if (r)
@@ -815,10 +855,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requesting vm
+ * @level: the page table level
+ * @direct: use a direct update
* @bp: resulting BO allocation parameters
*/
static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int level, struct amdgpu_bo_param *bp)
+ int level, bool direct,
+ struct amdgpu_bo_param *bp)
{
memset(bp, 0, sizeof(*bp));
@@ -833,8 +876,9 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else if (!vm->root.base.bo || vm->root.base.bo->shadow)
bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
bp->type = ttm_bo_type_kernel;
+ bp->no_wait_gpu = direct;
if (vm->root.base.bo)
- bp->resv = vm->root.base.bo->tbo.resv;
+ bp->resv = vm->root.base.bo->tbo.base.resv;
}
/**
@@ -843,6 +887,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* @adev: amdgpu_device pointer
* @vm: VM to allocate page tables for
* @cursor: Which page table to allocate
+ * @direct: use a direct update
*
* Make sure a specific page table or directory is allocated.
*
@@ -852,7 +897,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *cursor)
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool direct)
{
struct amdgpu_vm_pt *entry = cursor->entry;
struct amdgpu_bo_param bp;
@@ -873,7 +919,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (entry->base.bo)
return 0;
- amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
+ amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
r = amdgpu_bo_create(adev, &bp, &pt);
if (r)
@@ -885,7 +931,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
amdgpu_vm_bo_base_init(&entry->base, vm, pt);
- r = amdgpu_vm_clear_bo(adev, vm, pt);
+ r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
if (r)
goto error_free_pt;
@@ -1022,7 +1068,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
* Returns:
* 0 on success, errno otherwise.
*/
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
@@ -1036,10 +1083,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
bool vm_flush_needed = job->vm_needs_flush;
- bool pasid_mapping_needed = id->pasid != job->pasid ||
- !id->pasid_mapping ||
- !dma_fence_is_signaled(id->pasid_mapping);
struct dma_fence *fence = NULL;
+ bool pasid_mapping_needed = false;
unsigned patch_offset = 0;
int r;
@@ -1049,6 +1094,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
pasid_mapping_needed = true;
}
+ mutex_lock(&id_mgr->lock);
+ if (id->pasid != job->pasid || !id->pasid_mapping ||
+ !dma_fence_is_signaled(id->pasid_mapping))
+ pasid_mapping_needed = true;
+ mutex_unlock(&id_mgr->lock);
+
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
@@ -1088,9 +1139,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
}
if (pasid_mapping_needed) {
+ mutex_lock(&id_mgr->lock);
id->pasid = job->pasid;
dma_fence_put(id->pasid_mapping);
id->pasid_mapping = dma_fence_get(fence);
+ mutex_unlock(&id_mgr->lock);
}
dma_fence_put(fence);
@@ -1174,10 +1227,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
return result;
}
-/*
+/**
* amdgpu_vm_update_pde - update a single level in the hierarchy
*
- * @param: parameters for the update
+ * @params: parameters for the update
* @vm: requested vm
* @entry: entry to update
*
@@ -1201,7 +1254,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
}
-/*
+/**
* amdgpu_vm_invalidate_pds - mark all PDs as invalid
*
* @adev: amdgpu_device pointer
@@ -1220,19 +1273,20 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
amdgpu_vm_bo_relocated(&entry->base);
}
-/*
- * amdgpu_vm_update_directories - make sure that all directories are valid
+/**
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @direct: submit directly to the paging queue
*
* Makes sure all directories are up to date.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct)
{
struct amdgpu_vm_update_params params;
int r;
@@ -1243,6 +1297,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_VM, NULL);
if (r)
@@ -1270,7 +1325,7 @@ error:
return r;
}
-/**
+/*
* amdgpu_vm_update_flags - figure out flags for PTE updates
*
* Make sure to set the right flags for the PTEs at the desired level.
@@ -1393,7 +1448,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
- r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+ /* make sure that the page tables covering the address range are
+ * actually allocated
+ */
+ r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
+ params->direct);
if (r)
return r;
@@ -1465,7 +1524,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
} while (frag_start < entry_end);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries */
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, &cursor);
amdgpu_vm_pt_next(adev, &cursor);
@@ -1484,13 +1548,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
*
* @adev: amdgpu_device pointer
- * @exclusive: fence we need to sync to
- * @pages_addr: DMA addresses to use for mapping
* @vm: requested vm
+ * @direct: direct submission in a page fault
+ * @exclusive: fence we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @addr: addr to set the area to
+ * @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
@@ -1499,11 +1564,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct,
struct dma_fence *exclusive,
- dma_addr_t *pages_addr,
- struct amdgpu_vm *vm,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t addr,
+ dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
@@ -1513,21 +1578,32 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
params.pages_addr = pages_addr;
/* sync to everything except eviction fences on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
+ amdgpu_vm_eviction_lock(vm);
+ if (vm->evicting) {
+ r = -EBUSY;
+ goto error_unlock;
+ }
+
r = vm->update_funcs->prepare(&params, owner, exclusive);
if (r)
- return r;
+ goto error_unlock;
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r)
- return r;
+ goto error_unlock;
+
+ r = vm->update_funcs->commit(&params, fence);
- return vm->update_funcs->commit(&params, fence);
+error_unlock:
+ amdgpu_vm_eviction_unlock(vm);
+ return r;
}
/**
@@ -1571,27 +1647,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
flags &= ~AMDGPU_PTE_WRITEABLE;
- flags &= ~AMDGPU_PTE_EXECUTABLE;
- flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
-
- if (adev->asic_type == CHIP_NAVI10) {
- flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
- } else {
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK);
- }
-
- if ((mapping->flags & AMDGPU_PTE_PRT) &&
- (adev->asic_type >= CHIP_VEGA10)) {
- flags |= AMDGPU_PTE_PRT;
- if (adev->asic_type >= CHIP_NAVI10) {
- flags |= AMDGPU_PTE_SNOOPED;
- flags |= AMDGPU_PTE_LOG;
- flags |= AMDGPU_PTE_SYSTEM;
- }
- flags &= ~AMDGPU_PTE_VALID;
- }
+ /* Apply ASIC specific mapping flags */
+ amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
trace_amdgpu_vm_bo_update(mapping);
@@ -1635,7 +1692,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
dma_addr = pages_addr;
} else {
addr = pages_addr[pfn];
- max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ max_entries = count *
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
} else if (flags & AMDGPU_PTE_VALID) {
@@ -1644,9 +1702,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
last = min((uint64_t)mapping->last, start + max_entries - 1);
- r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive,
start, last, flags, addr,
- fence);
+ dma_addr, fence);
if (r)
return r;
@@ -1674,8 +1732,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
* Returns:
* 0 for success, -EINVAL for failure.
*/
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
bool clear)
{
struct amdgpu_bo *bo = bo_va->base.bo;
@@ -1702,7 +1759,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address;
}
- exclusive = reservation_object_get_excl(bo->tbo.resv);
+ exclusive = bo->tbo.moving;
}
if (bo) {
@@ -1712,7 +1769,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
flags = 0x0;
}
- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
+ if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
@@ -1733,20 +1790,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
return r;
}
- if (vm->use_cpu_for_update) {
- /* Flush HDP */
- mb();
- amdgpu_asic_flush_hdp(adev, NULL);
- }
-
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
*/
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
uint32_t mem_type = bo->tbo.mem.mem_type;
- if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+ if (!(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -1879,18 +1931,18 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
*/
static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
+ struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
struct dma_fence *excl, **shared;
unsigned i, shared_count;
int r;
- r = reservation_object_get_fences_rcu(resv, &excl,
+ r = dma_resv_get_fences_rcu(resv, &excl,
&shared_count, &shared);
if (r) {
/* Not enough memory to grab the fence list, as last resort
* block for all the fences to complete.
*/
- reservation_object_wait_timeout_rcu(resv, true, false,
+ dma_resv_wait_timeout_rcu(resv, true, false,
MAX_SCHEDULE_TIMEOUT);
return;
}
@@ -1940,9 +1992,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
mapping->start, mapping->last,
- init_pte_value, 0, &f);
+ init_pte_value, 0, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -1978,7 +2030,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdgpu_bo_va *bo_va, *tmp;
- struct reservation_object *resv;
+ struct dma_resv *resv;
bool clear;
int r;
@@ -1993,11 +2045,11 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
- resv = bo_va->base.bo->tbo.resv;
+ resv = bo_va->base.bo->tbo.base.resv;
spin_unlock(&vm->invalidated_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+ if (!amdgpu_vm_debug && dma_resv_trylock(resv))
clear = false;
/* Somebody else is using the BO right now */
else
@@ -2008,7 +2060,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
return r;
if (!clear)
- reservation_object_unlock(resv);
+ dma_resv_unlock(resv);
spin_lock(&vm->invalidated_lock);
}
spin_unlock(&vm->invalidated_lock);
@@ -2084,7 +2136,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (mapping->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv &&
!bo_va->base.moved) {
list_move(&bo_va->base.vm_status, &vm->moved);
}
@@ -2416,7 +2468,8 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
struct amdgpu_bo *bo;
bo = mapping->bo_va->base.bo;
- if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket)
+ if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
+ ticket)
continue;
}
@@ -2443,7 +2496,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base **base;
if (bo) {
- if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+ if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
vm->bulk_moveable = false;
for (base = &bo_va->base.bo->vm_bo; *base;
@@ -2487,6 +2540,41 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_evictable - check if we can evict a VM
+ *
+ * @bo: A page table of the VM.
+ *
+ * Check if it is possible to evict a VM.
+ */
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
+
+ /* Page tables of a destroyed VM can go away immediately */
+ if (!bo_base || !bo_base->vm)
+ return true;
+
+ /* Don't evict VM page tables while they are busy */
+ if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
+ return false;
+
+ /* Try to block ongoing updates */
+ if (!amdgpu_vm_eviction_trylock(bo_base->vm))
+ return false;
+
+ /* Don't evict VM page tables while they are updated */
+ if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
+ !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return false;
+ }
+
+ bo_base->vm->evicting = true;
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return true;
+}
+
+/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
* @adev: amdgpu_device pointer
@@ -2507,7 +2595,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+ if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
amdgpu_vm_bo_evicted(bo_base);
continue;
}
@@ -2518,7 +2606,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->tbo.type == ttm_bo_type_kernel)
amdgpu_vm_bo_relocated(bo_base);
- else if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+ else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
amdgpu_vm_bo_moved(bo_base);
else
amdgpu_vm_bo_invalidated(bo_base);
@@ -2648,8 +2736,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv,
- true, true, timeout);
+ timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
+ true, true, timeout);
+ if (timeout <= 0)
+ return timeout;
+
+ timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout);
+ if (timeout <= 0)
+ return timeout;
+
+ return dma_fence_wait_timeout(vm->last_delayed, true, timeout);
}
/**
@@ -2683,13 +2779,22 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->freed);
- /* create scheduler entity for page table updates */
- r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
- adev->vm_manager.vm_pte_num_rqs, NULL);
+
+ /* create scheduler entities for page table updates */
+ r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
if (r)
return r;
+ r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error_free_direct;
+
vm->pte_support_ats = false;
+ vm->is_compute_context = false;
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2703,7 +2808,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->use_cpu_for_update)
@@ -2711,25 +2817,30 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else
vm->update_funcs = &amdgpu_vm_sdma_funcs;
vm->last_update = NULL;
+ vm->last_direct = dma_fence_get_stub();
+ vm->last_delayed = dma_fence_get_stub();
+
+ mutex_init(&vm->eviction_lock);
+ vm->evicting = false;
- amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
+ amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_create(adev, &bp, &root);
if (r)
- goto error_free_sched_entity;
+ goto error_free_delayed;
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_free_root;
- r = reservation_object_reserve_shared(root->tbo.resv, 1);
+ r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
if (r)
goto error_unreserve;
amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
- r = amdgpu_vm_clear_bo(adev, vm, root);
+ r = amdgpu_vm_clear_bo(adev, vm, root, false);
if (r)
goto error_unreserve;
@@ -2760,8 +2871,13 @@ error_free_root:
amdgpu_bo_unref(&vm->root.base.bo);
vm->root.base.bo = NULL;
-error_free_sched_entity:
- drm_sched_entity_destroy(&vm->entity);
+error_free_delayed:
+ dma_fence_put(vm->last_direct);
+ dma_fence_put(vm->last_delayed);
+ drm_sched_entity_destroy(&vm->delayed);
+
+error_free_direct:
+ drm_sched_entity_destroy(&vm->direct);
return r;
}
@@ -2802,6 +2918,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @pasid: pasid to use
*
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
@@ -2817,7 +2934,8 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
* Returns:
* 0 for success, -errno for errors.
*/
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned int pasid)
{
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
@@ -2849,7 +2967,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
*/
if (pte_support_ats != vm->pte_support_ats) {
vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
+ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
if (r)
goto free_idr;
}
@@ -2859,9 +2977,18 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
+ if (vm->use_cpu_for_update)
+ vm->update_funcs = &amdgpu_vm_cpu_funcs;
+ else
+ vm->update_funcs = &amdgpu_vm_sdma_funcs;
+ dma_fence_put(vm->last_update);
+ vm->last_update = NULL;
+ vm->is_compute_context = true;
+
if (vm->pasid) {
unsigned long flags;
@@ -2915,6 +3042,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
vm->pasid = 0;
+ vm->is_compute_context = false;
}
/**
@@ -2931,31 +3059,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
- int i, r;
+ int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+ root = amdgpu_bo_ref(vm->root.base.bo);
+ amdgpu_bo_reserve(root, true);
if (vm->pasid) {
unsigned long flags;
spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ vm->pasid = 0;
}
- drm_sched_entity_destroy(&vm->entity);
+ dma_fence_wait(vm->last_direct, false);
+ dma_fence_put(vm->last_direct);
+ dma_fence_wait(vm->last_delayed, false);
+ dma_fence_put(vm->last_delayed);
- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
- dev_err(adev->dev, "still active bo inside vm\n");
- }
- rbtree_postorder_for_each_entry_safe(mapping, tmp,
- &vm->va.rb_root, rb) {
- /* Don't remove the mapping here, we don't want to trigger a
- * rebalance and the tree is about to be destroyed anyway.
- */
- list_del(&mapping->list);
- kfree(mapping);
- }
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
@@ -2966,16 +3089,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- root = amdgpu_bo_ref(vm->root.base.bo);
- r = amdgpu_bo_reserve(root, true);
- if (r) {
- dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
- } else {
- amdgpu_vm_free_pts(adev, vm, NULL);
- amdgpu_bo_unreserve(root);
- }
+ amdgpu_vm_free_pts(adev, vm, NULL);
+ amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
WARN_ON(vm->root.base.bo);
+
+ drm_sched_entity_destroy(&vm->direct);
+ drm_sched_entity_destroy(&vm->delayed);
+
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
+ dev_err(adev->dev, "still active bo inside vm\n");
+ }
+ rbtree_postorder_for_each_entry_safe(mapping, tmp,
+ &vm->va.rb_root, rb) {
+ /* Don't remove the mapping here, we don't want to trigger a
+ * rebalance and the tree is about to be destroyed anyway.
+ */
+ list_del(&mapping->list);
+ kfree(mapping);
+ }
+
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vmid_free_reserved(adev, vm, i);
@@ -3059,13 +3192,14 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
- /* current, we only have requirement to reserve vmid from gfxhub */
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
+ /* We only have requirement to reserve vmid from gfxhub */
+ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
+ AMDGPU_GFXHUB_0);
if (r)
return r;
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
+ amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
break;
default:
return -EINVAL;
@@ -3103,13 +3237,97 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
*/
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
{
- if (!vm->task_info.pid) {
- vm->task_info.pid = current->pid;
- get_task_comm(vm->task_info.task_name, current);
+ if (vm->task_info.pid)
+ return;
- if (current->group_leader->mm == current->mm) {
- vm->task_info.tgid = current->group_leader->pid;
- get_task_comm(vm->task_info.process_name, current->group_leader);
- }
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name, current->group_leader);
+}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr)
+{
+ struct amdgpu_bo *root;
+ uint64_t value, flags;
+ struct amdgpu_vm *vm;
+ long r;
+
+ spin_lock(&adev->vm_manager.pasid_lock);
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm)
+ root = amdgpu_bo_ref(vm->root.base.bo);
+ else
+ root = NULL;
+ spin_unlock(&adev->vm_manager.pasid_lock);
+
+ if (!root)
+ return false;
+
+ r = amdgpu_bo_reserve(root, true);
+ if (r)
+ goto error_unref;
+
+ /* Double check that the VM still exists */
+ spin_lock(&adev->vm_manager.pasid_lock);
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm && vm->root.base.bo != root)
+ vm = NULL;
+ spin_unlock(&adev->vm_manager.pasid_lock);
+ if (!vm)
+ goto error_unlock;
+
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+ AMDGPU_PTE_SYSTEM;
+
+ if (vm->is_compute_context) {
+ /* Intentionally setting invalid PTE flag
+ * combination to force a no-retry-fault
+ */
+ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+ AMDGPU_PTE_TF;
+ value = 0;
+
+ } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+ /* Redirect the access to the dummy page */
+ value = adev->dummy_page_addr;
+ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+ AMDGPU_PTE_WRITEABLE;
+
+ } else {
+ /* Let the hw retry silently on the PTE */
+ value = 0;
}
+
+ r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+ flags, value, NULL, NULL);
+ if (r)
+ goto error_unlock;
+
+ r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+ amdgpu_bo_unreserve(root);
+ if (r < 0)
+ DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+ amdgpu_bo_unref(&root);
+
+ return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 489a162ca620..b4640ab38c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -90,7 +91,7 @@ struct amdgpu_bo_list_entry;
| AMDGPU_PTE_WRITEABLE \
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
-/* NAVI10 only */
+/* gfx10 */
#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
@@ -99,10 +100,14 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
+/* Reserve 4MB VRAM for page tables */
+#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20)
+
/* max number of VMHUB */
-#define AMDGPU_MAX_VMHUBS 2
-#define AMDGPU_GFXHUB 0
-#define AMDGPU_MMHUB 1
+#define AMDGPU_MAX_VMHUBS 3
+#define AMDGPU_GFXHUB_0 0
+#define AMDGPU_MMHUB_0 1
+#define AMDGPU_MMHUB_1 2
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
@@ -198,6 +203,11 @@ struct amdgpu_vm_update_params {
struct amdgpu_vm *vm;
/**
+ * @direct: if changes should be made directly
+ */
+ bool direct;
+
+ /**
* @pages_addr:
*
* DMA addresses to use for mapping
@@ -230,6 +240,13 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
+ /* Lock to prevent eviction while we are updating page tables
+ * use vm_eviction_lock/unlock(vm)
+ */
+ struct mutex eviction_lock;
+ bool evicting;
+ unsigned int saved_flags;
+
/* BOs who needs a validation */
struct list_head evicted;
@@ -253,8 +270,13 @@ struct amdgpu_vm {
struct amdgpu_vm_pt root;
struct dma_fence *last_update;
- /* Scheduler entity for page table updates */
- struct drm_sched_entity entity;
+ /* Scheduler entities for page table updates */
+ struct drm_sched_entity direct;
+ struct drm_sched_entity delayed;
+
+ /* Last submission to the scheduler entities */
+ struct dma_fence *last_direct;
+ struct dma_fence *last_delayed;
unsigned int pasid;
/* dedicated to vm */
@@ -288,6 +310,8 @@ struct amdgpu_vm {
struct ttm_lru_bulk_move lru_bulk_move;
/* mark whether can do the bulk move */
bool bulk_moveable;
+ /* Flag to indicate if VM is used for compute */
+ bool is_compute_context;
};
struct amdgpu_vm_manager {
@@ -307,8 +331,8 @@ struct amdgpu_vm_manager {
u64 vram_base_offset;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
- struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
- unsigned vm_pte_num_rqs;
+ struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_scheds;
struct amdgpu_ring *page_fault;
/* partial resident texture handling */
@@ -356,8 +380,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
@@ -366,6 +390,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
@@ -403,6 +428,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 5222d165abfc..73fec7a0ced5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -49,13 +49,6 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
{
int r;
- /* Wait for PT BOs to be idle. PTs share the same resv. object
- * as the root PD BO
- */
- r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
- if (unlikely(r))
- return r;
-
/* Wait for any BO move to be completed */
if (exclusive) {
r = dma_fence_wait(exclusive, true);
@@ -63,7 +56,14 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
return r;
}
- return 0;
+ /* Don't wait for submissions during page fault */
+ if (p->direct)
+ return 0;
+
+ /* Wait for PT BOs to be idle. PTs share the same resv. object
+ * as the root PD BO
+ */
+ return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
}
/**
@@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
pe += (unsigned long)amdgpu_bo_kptr(bo);
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
for (i = 0; i < count; i++) {
value = p->pages_addr ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index ddd181f5ed37..19b7f80758f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -68,17 +68,19 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
if (r)
return r;
- r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
- if (r)
- return r;
+ p->num_dw_left = ndw;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
- owner, false);
+ /* Wait for moves to be completed */
+ r = amdgpu_sync_fence(&p->job->sync, exclusive, false);
if (r)
return r;
- p->num_dw_left = ndw;
- return 0;
+ /* Don't wait for any submissions during page fault handling */
+ if (p->direct)
+ return 0;
+
+ return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
+ owner, false);
}
/**
@@ -93,24 +95,30 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- struct amdgpu_bo *root = p->vm->root.base.bo;
struct amdgpu_ib *ib = p->job->ibs;
+ struct drm_sched_entity *entity;
+ struct dma_fence *f, *tmp;
struct amdgpu_ring *ring;
- struct dma_fence *f;
int r;
- ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
+ entity = p->direct ? &p->vm->direct : &p->vm->delayed;
+ ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
WARN_ON(ib->length_dw > p->num_dw_left);
- r = amdgpu_job_submit(p->job, &p->vm->entity,
- AMDGPU_FENCE_OWNER_VM, &f);
+ r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
if (r)
goto error;
- amdgpu_bo_fence(root, f, true);
- if (fence)
+ tmp = dma_fence_get(f);
+ if (p->direct)
+ swap(p->vm->last_direct, tmp);
+ else
+ swap(p->vm->last_delayed, tmp);
+ dma_fence_put(tmp);
+
+ if (fence && !p->direct)
swap(*fence, f);
dma_fence_put(f);
return 0;
@@ -120,7 +128,6 @@ error:
return r;
}
-
/**
* amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
*
@@ -141,7 +148,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
src += p->num_dw_left * 4;
pe += amdgpu_bo_gpu_offset(bo);
- trace_amdgpu_vm_copy_ptes(pe, src, count);
+ trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct);
amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
}
@@ -168,7 +175,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
struct amdgpu_ib *ib = p->job->ibs;
pe += amdgpu_bo_gpu_offset(bo);
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
if (count < 3) {
amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
count, incr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 3a9d8c15fe9f..82a3299e53c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -23,6 +23,9 @@
*/
#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
struct amdgpu_vram_mgr {
struct drm_mm mm;
@@ -101,6 +104,39 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]));
}
+static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ switch (adev->gmc.vram_vendor) {
+ case SAMSUNG:
+ return snprintf(buf, PAGE_SIZE, "samsung\n");
+ case INFINEON:
+ return snprintf(buf, PAGE_SIZE, "infineon\n");
+ case ELPIDA:
+ return snprintf(buf, PAGE_SIZE, "elpida\n");
+ case ETRON:
+ return snprintf(buf, PAGE_SIZE, "etron\n");
+ case NANYA:
+ return snprintf(buf, PAGE_SIZE, "nanya\n");
+ case HYNIX:
+ return snprintf(buf, PAGE_SIZE, "hynix\n");
+ case MOSEL:
+ return snprintf(buf, PAGE_SIZE, "mosel\n");
+ case WINBOND:
+ return snprintf(buf, PAGE_SIZE, "winbond\n");
+ case ESMT:
+ return snprintf(buf, PAGE_SIZE, "esmt\n");
+ case MICRON:
+ return snprintf(buf, PAGE_SIZE, "micron\n");
+ default:
+ return snprintf(buf, PAGE_SIZE, "unknown\n");
+ }
+}
+
static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
amdgpu_mem_info_vram_total_show, NULL);
static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
@@ -109,6 +145,8 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
amdgpu_mem_info_vram_used_show, NULL);
static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
amdgpu_mem_info_vis_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
+ amdgpu_mem_info_vram_vendor, NULL);
/**
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
@@ -154,6 +192,11 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n");
return ret;
}
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_vram_vendor\n");
+ return ret;
+ }
return 0;
}
@@ -180,6 +223,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
+ device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor);
return 0;
}
@@ -275,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t vis_usage = 0, mem_bytes;
+ uint64_t vis_usage = 0, mem_bytes, max_bytes;
unsigned i;
int r;
@@ -283,9 +327,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (!lpfn)
lpfn = man->size;
+ max_bytes = adev->gmc.mc_vram_size;
+ if (tbo->type != ttm_bo_type_kernel)
+ max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+
/* bail out quickly if there's likely not enough VRAM for this BO */
mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
- if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) {
+ if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
atomic64_sub(mem_bytes, &mgr->usage);
mem->mm_node = NULL;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index d11eba09eadd..a97af422575a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,7 +25,8 @@
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_smu.h"
-
+#include "amdgpu_ras.h"
+#include "df/df_3_6_offset.h"
static DEFINE_MUTEX(xgmi_mutex);
@@ -131,9 +132,37 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
}
+#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
+static ssize_t amdgpu_xgmi_show_error(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
+ uint64_t fica_out;
+ unsigned int error_count = 0;
+
+ ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
+ ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
-static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
+ if (fica_out != 0x1f)
+ pr_err("xGMI error counters not enabled!\n");
+
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
+
+ if ((fica_out & 0xffff) == 2)
+ error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
+ adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", error_count);
+}
+
+
+static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
@@ -148,6 +177,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
return ret;
}
+ /* Create xgmi error file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
+ if (ret)
+ pr_err("failed to create xgmi_error\n");
+
+
/* Create sysfs link to hive info folder on the first device */
if (adev != hive->adev) {
ret = sysfs_create_link(&adev->dev->kobj, hive->kobj,
@@ -226,6 +261,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
INIT_LIST_HEAD(&tmp->device_list);
mutex_init(&tmp->hive_lock);
mutex_init(&tmp->reset_lock);
+ task_barrier_init(&tmp->tb);
if (lock)
mutex_lock(&tmp->hive_lock);
@@ -239,22 +275,49 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
+ bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
if (!hive)
return 0;
- if (hive->pstate == pstate)
- return 0;
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ adev->pstate = is_high_pstate ? pstate : adev->pstate;
+ goto out;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
- if (is_support_sw_smu(adev))
- ret = smu_set_xgmi_pstate(&adev->smu, pstate);
- if (ret)
+ ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate || is_high_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -296,23 +359,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
- int count = 0, ret = -EINVAL;
+ int count = 0, ret = 0;
if (!adev->gmc.xgmi.supported)
return 0;
- ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
- if (ret) {
- dev_err(adev->dev,
- "XGMI: Failed to get node id\n");
- return ret;
- }
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Failed to get hive id\n");
+ return ret;
+ }
- ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
- if (ret) {
- dev_err(adev->dev,
- "XGMI: Failed to get hive id\n");
- return ret;
+ ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Failed to get node id\n");
+ return ret;
+ }
+ } else {
+ adev->gmc.xgmi.hive_id = 16;
+ adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
}
hive = amdgpu_get_xgmi_hive(adev, 1);
@@ -324,6 +392,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
@@ -332,29 +403,34 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
top_info->num_nodes = count;
hive->number_devices = count;
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- /* update node list for other device in the hive */
- if (tmp_adev != adev) {
- top_info = &tmp_adev->psp.xgmi_context.top_info;
- top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
- top_info->num_nodes = count;
+ task_barrier_add_task(&hive->tb);
+
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ /* update node list for other device in the hive */
+ if (tmp_adev != adev) {
+ top_info = &tmp_adev->psp.xgmi_context.top_info;
+ top_info->nodes[count - 1].node_id =
+ adev->gmc.xgmi.node_id;
+ top_info->num_nodes = count;
+ }
+ ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+ if (ret)
+ goto exit;
}
- ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
- if (ret)
- goto exit;
- }
- /* get latest topology info for each device from psp */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
- &tmp_adev->psp.xgmi_context.top_info);
- if (ret) {
- dev_err(tmp_adev->dev,
- "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
- tmp_adev->gmc.xgmi.node_id,
- tmp_adev->gmc.xgmi.hive_id, ret);
- /* To do : continue with some node failed or disable the whole hive */
- goto exit;
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ goto exit;
+ }
}
}
@@ -391,7 +467,57 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
mutex_destroy(&hive->hive_lock);
mutex_destroy(&hive->reset_lock);
} else {
+ task_barrier_rem_task(&hive->tb);
amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
mutex_unlock(&hive->hive_lock);
}
}
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "xgmi_wafl_err_count",
+ .debugfs_name = "xgmi_wafl_err_inject",
+ };
+
+ if (!adev->gmc.xgmi.supported ||
+ adev->gmc.xgmi.num_physical_nodes == 0)
+ return 0;
+
+ if (!adev->gmc.xgmi.ras_if) {
+ adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gmc.xgmi.ras_if)
+ return -ENOMEM;
+ adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if->sub_block_index = 0;
+ strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+ }
+ ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+ kfree(adev->gmc.xgmi.ras_if);
+ adev->gmc.xgmi.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
+ adev->gmc.xgmi.ras_if) {
+ struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index fbcee31788c4..74011fbc2251 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -22,6 +22,7 @@
#ifndef __AMDGPU_XGMI_H__
#define __AMDGPU_XGMI_H__
+#include <drm/task_barrier.h>
#include "amdgpu_psp.h"
struct amdgpu_hive_info {
@@ -33,6 +34,7 @@ struct amdgpu_hive_info {
struct device_attribute dev_attr;
struct amdgpu_device *adev;
int pstate; /*0 -- low , 1 -- high , -1 unknown*/
+ struct task_barrier tb;
};
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
@@ -42,6 +44,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
new file mode 100644
index 000000000000..fda99c958c3b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "arct_ip_offset.h"
+
+int arct_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the block needed by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+ adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+ adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i]));
+ adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i]));
+ adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
+ }
+ return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
new file mode 100644
index 000000000000..847ca9b3ce4e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "athub_v1_0.h"
+
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if(def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ case CHIP_RAVEN:
+ athub_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
new file mode 100644
index 000000000000..b279af59e34f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V1_0_H__
+#define __ATHUB_V1_0_H__
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index 89b32b6b81c8..921a69abda55 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -74,10 +74,12 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
athub_v2_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
athub_v2_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 6858cde9fc5d..ea702a64f807 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -361,7 +361,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct amdgpu_connector_atom_dig *dig_connector;
int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector);
u8 tmp;
@@ -369,8 +368,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
if (!amdgpu_connector->con_priv)
return panel_mode;
- dig_connector = amdgpu_connector->con_priv;
-
if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
/* DP bridge chips */
if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
@@ -713,7 +710,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
struct amdgpu_connector *amdgpu_connector;
struct amdgpu_connector_atom_dig *dig_connector;
struct amdgpu_atombios_dp_link_train_info dp_info;
@@ -721,7 +717,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
if (!amdgpu_encoder->enc_priv)
return;
- dig = amdgpu_encoder->enc_priv;
amdgpu_connector = to_amdgpu_connector(connector);
if (!amdgpu_connector->con_priv)
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 980c363b1a0a..b4cc7c55fa16 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -76,11 +76,6 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
}
args.lpI2CDataOut = cpu_to_le16(out);
} else {
- if (num > ATOM_MAX_HW_I2C_READ) {
- DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num);
- r = -EINVAL;
- goto done;
- }
args.ucRegIndex = 0;
args.lpI2CDataOut = 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 1ffbc0d3d7a1..006f21ef7ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -966,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
{mmGRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmGRBM_STATUS_SE2},
+ {mmGRBM_STATUS_SE3},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+ {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
+ {mmCP_CPF_BUSY_STAT},
+ {mmCP_CPF_STALLED_STAT1},
+ {mmCP_CPF_STATUS},
+ {mmCP_CPC_BUSY_STAT},
+ {mmCP_CPC_STALLED_STAT1},
+ {mmCP_CPC_STATUS},
{mmGB_ADDR_CONFIG},
{mmMC_ARB_RAMCFG},
{mmGB_TILE_MODE0},
@@ -1270,15 +1289,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
}
/**
- * cik_asic_reset - soft reset GPU
+ * cik_asic_pci_config_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
* Returns 0 for success.
*/
-static int cik_asic_reset(struct amdgpu_device *adev)
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
{
int r;
@@ -1291,6 +1310,64 @@ static int cik_asic_reset(struct amdgpu_device *adev)
return r;
}
+static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
+static enum amd_reset_method
+cik_asic_reset_method(struct amdgpu_device *adev)
+{
+ bool baco_reset;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ /* disable baco reset until it works */
+ /* smu7_asic_get_baco_capability(adev, &baco_reset); */
+ baco_reset = false;
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ r = cik_asic_pci_config_reset(adev);
+ }
+
+ return r;
+}
+
static u32 cik_get_config_memsize(struct amdgpu_device *adev)
{
return RREG32(mmCONFIG_MEMSIZE);
@@ -1378,7 +1455,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1413,12 +1489,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1428,14 +1499,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1459,15 +1533,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1480,26 +1562,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1514,15 +1615,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1823,6 +1925,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.read_bios_from_rom = &cik_read_bios_from_rom,
.read_register = &cik_read_register,
.reset = &cik_asic_reset,
+ .reset_method = &cik_asic_reset_method,
.set_vga_state = &cik_vga_set_state,
.get_xclk = &cik_get_xclk,
.set_uvd_clocks = &cik_set_uvd_clocks,
@@ -1835,6 +1938,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_pcie_usage = &cik_get_pcie_usage,
.need_reset_on_init = &cik_need_reset_on_init,
.get_pcie_replay_count = &cik_get_pcie_replay_count,
+ .supports_baco = &cik_asic_supports_baco,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index 54c625a2e570..f91ab4c246b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -31,4 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
int cik_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..580d3f93d670 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
- cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1372,16 +1372,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version cik_sdma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 1ffd1963e765..40d2ac723dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -236,6 +236,7 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base, bool async)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
u32 tmp;
/* flip at hsync for async, default is vsync */
@@ -243,6 +244,9 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
/* update the primary scanout address */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
@@ -326,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -364,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
amdgpu_irq_get(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -378,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -393,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1215,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
int interlace = 0;
@@ -1226,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1257,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
u8 *sadb = NULL;
@@ -1269,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1309,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1335,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1348,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 9e0782b54066..898ef72d423c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -254,6 +254,7 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base, bool async)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
u32 tmp;
/* flip immediate for async, default is vsync */
@@ -261,6 +262,9 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
@@ -344,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -381,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -395,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -409,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1241,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
int interlace = 0;
@@ -1252,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1283,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
u8 *sadb = NULL;
@@ -1295,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1335,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1361,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1374,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 4bf453e07dca..db15a112becc 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -191,10 +191,14 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base, bool async)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
/* flip at hsync for async, default is vsync */
WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
@@ -277,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -305,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
-
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -320,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -334,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1120,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
int interlace = 0;
u32 tmp;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1160,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u8 *sadb = NULL;
int sad_count;
u32 tmp;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1217,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1240,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1253,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
u32 tmp = 0;
@@ -1628,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
int bpc = 8;
@@ -1635,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
if (!dig || !dig->afmt)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index b23418ca8f6a..f06c9022c1fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -184,10 +184,14 @@ static void dce_v8_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base, bool async)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
/* flip at hsync for async, default is vsync */
WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
/* update the primary scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
@@ -271,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -299,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -313,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -327,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1153,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp = 0, offset;
@@ -1165,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1210,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 offset, tmp;
u8 *sadb = NULL;
@@ -1224,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1259,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 offset;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1288,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1301,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 3cc0a16649f9..e4f94863332c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -260,15 +260,14 @@ static struct drm_encoder *
dce_virtual_encoder(struct drm_connector *connector)
{
struct drm_encoder *encoder;
- int i;
- drm_connector_for_each_possible_encoder(connector, encoder, i) {
+ drm_connector_for_each_possible_encoder(connector, encoder) {
if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
return encoder;
}
/* pick the first one */
- drm_connector_for_each_possible_encoder(connector, encoder, i)
+ drm_connector_for_each_possible_encoder(connector, encoder)
return encoder;
return NULL;
@@ -454,13 +453,8 @@ static int dce_virtual_hw_init(void *handle)
#endif
/* no DCE */
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_NAVI10:
- break;
default:
- DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
+ break;
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 844c03868248..d6aca1c08068 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -31,6 +31,13 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
static void df_v1_7_sw_init(struct amdgpu_device *adev)
{
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+}
+
+static void df_v1_7_sw_fini(struct amdgpu_device *adev)
+{
}
static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -62,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
- fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
return df_v1_7_channel_number[fb_channel_number];
}
@@ -73,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
u32 tmp;
/* Put DF on broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, true);
+ adev->df.funcs->enable_broadcast_mode(adev, true);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
@@ -88,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
/* Exit boradcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, false);
+ adev->df.funcs->enable_broadcast_mode(adev, false);
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
@@ -111,6 +118,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
const struct amdgpu_df_funcs df_v1_7_funcs = {
.sw_init = df_v1_7_sw_init,
+ .sw_fini = df_v1_7_sw_fini,
.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
.get_fb_channel_number = df_v1_7_get_fb_channel_number,
.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index ef6e91f9f51c..5a1bd8ed1a6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -27,6 +27,9 @@
#include "df/df_3_6_offset.h"
#include "df/df_3_6_sh_mask.h"
+#define DF_3_6_SMN_REG_INST_DIST 0x8
+#define DF_3_6_INST_CNT 8
+
static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
16, 32, 0, 0, 0, 2, 4, 8};
@@ -93,6 +96,151 @@ const struct attribute_group *df_v3_6_attr_groups[] = {
NULL
};
+static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
+ uint32_t ficaa_val)
+{
+ unsigned long flags, address, data;
+ uint32_t ficadl_val, ficadh_val;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+ WREG32(data, ficaa_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+ ficadl_val = RREG32(data);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+ ficadh_val = RREG32(data);
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
+}
+
+static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
+ uint32_t ficadl_val, uint32_t ficadh_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+ WREG32(data, ficaa_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+ WREG32(data, ficadl_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+ WREG32(data, ficadh_val);
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_rreg - read perfmon lo and hi
+ *
+ * required to be atomic. no mmio method provided so subsequent reads for lo
+ * and hi require to preserve df finite state machine
+ */
+static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t *lo_val,
+ uint32_t hi_addr, uint32_t *hi_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ *lo_val = RREG32(data);
+ WREG32(address, hi_addr);
+ *hi_val = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_wreg - write to perfmon lo and hi
+ *
+ * required to be atomic. no mmio method provided so subsequent reads after
+ * data writes cannot occur to preserve data fabrics finite state machine.
+ */
+static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
+ uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ WREG32(data, lo_val);
+ WREG32(address, hi_addr);
+ WREG32(data, hi_val);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/* same as perfmon_wreg but return status on write value check */
+static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ unsigned long flags, address, data;
+ uint32_t lo_val_rb, hi_val_rb;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ WREG32(data, lo_val);
+ WREG32(address, hi_addr);
+ WREG32(data, hi_val);
+
+ WREG32(address, lo_addr);
+ lo_val_rb = RREG32(data);
+ WREG32(address, hi_addr);
+ hi_val_rb = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
+ return -EBUSY;
+
+ return 0;
+}
+
+
+/*
+ * retry arming counters every 100 usecs within 1 millisecond interval.
+ * if retry fails after time out, return error.
+ */
+#define ARM_RETRY_USEC_TIMEOUT 1000
+#define ARM_RETRY_USEC_INTERVAL 100
+static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ int countdown = ARM_RETRY_USEC_TIMEOUT;
+
+ while (countdown) {
+
+ if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
+ hi_addr, hi_val))
+ break;
+
+ countdown -= ARM_RETRY_USEC_INTERVAL;
+ udelay(ARM_RETRY_USEC_INTERVAL);
+ }
+
+ return countdown > 0 ? 0 : -ETIME;
+}
+
/* get the number of df counters available */
static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
struct device_attribute *attr,
@@ -117,6 +265,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
/* device attr for available perfmon counters */
static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+
+ if (adev->asic_type != CHIP_ARCTURUS)
+ return;
+
+ /* encoding for hash-enabled on Arcturus */
+ if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+ adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl64K);
+ adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl2M);
+ adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl1G);
+ }
+}
+
/* init perfmons */
static void df_v3_6_sw_init(struct amdgpu_device *adev)
{
@@ -128,6 +302,15 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
adev->df_perfmon_config_assign_mask[i] = 0;
+
+ df_v3_6_query_hashes(adev);
+}
+
+static void df_v3_6_sw_fini(struct amdgpu_device *adev)
+{
+
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+
}
static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -159,7 +342,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
- fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
fb_channel_number = 0;
@@ -171,23 +354,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
u32 tmp;
- /* Put DF on broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, true);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- } else {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_DISABLE;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- }
+ if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+ /* Put DF on broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, true);
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
- /* Exit broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, false);
+ /* Exit broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, false);
+ }
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
@@ -231,20 +420,20 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
switch (target_cntr) {
case 0:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
break;
case 1:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
break;
case 2:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
break;
case 3:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
break;
}
@@ -268,6 +457,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
uint32_t *lo_val,
uint32_t *hi_val)
{
+
+ uint32_t eventsel, instance, unitmask;
+ uint32_t instance_10, instance_5432, instance_76;
+
df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
@@ -276,40 +469,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
return -ENXIO;
}
- if (lo_val && hi_val) {
- uint32_t eventsel, instance, unitmask;
- uint32_t instance_10, instance_5432, instance_76;
+ eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
+ unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
+ instance = DF_V3_6_GET_INSTANCE(config);
- eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
- unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
- instance = DF_V3_6_GET_INSTANCE(config);
+ instance_10 = instance & 0x3;
+ instance_5432 = (instance >> 2) & 0xf;
+ instance_76 = (instance >> 6) & 0x3;
- instance_10 = instance & 0x3;
- instance_5432 = (instance >> 2) & 0xf;
- instance_76 = (instance >> 6) & 0x3;
+ *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
+ *hi_val = (instance_76 << 29) | instance_5432;
- *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
- *hi_val = (instance_76 << 29) | instance_5432;
- }
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+ config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
return 0;
}
-/* assign df performance counters for read */
-static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
- uint64_t config,
- int *is_assigned)
+/* add df performance counters for read */
+static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
+ uint64_t config)
{
int i, target_cntr;
- *is_assigned = 0;
-
target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
- if (target_cntr >= 0) {
- *is_assigned = 1;
+ if (target_cntr >= 0)
return 0;
- }
for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
if (adev->df_perfmon_config_assign_mask[i] == 0U) {
@@ -322,6 +508,44 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
return -ENOSPC;
}
+#define DEFERRED_ARM_MASK (1 << 31)
+static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
+ uint64_t config, bool is_deferred)
+{
+ int target_cntr;
+
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ if (target_cntr < 0)
+ return -EINVAL;
+
+ if (is_deferred)
+ adev->df_perfmon_config_assign_mask[target_cntr] |=
+ DEFERRED_ARM_MASK;
+ else
+ adev->df_perfmon_config_assign_mask[target_cntr] &=
+ ~DEFERRED_ARM_MASK;
+
+ return 0;
+}
+
+static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ int target_cntr;
+
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ /*
+ * we never get target_cntr < 0 since this funciton is only called in
+ * pmc_count for now but we should check anyways.
+ */
+ return (target_cntr >= 0 &&
+ (adev->df_perfmon_config_assign_mask[target_cntr]
+ & DEFERRED_ARM_MASK));
+
+}
+
/* release performance counter */
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
uint64_t config)
@@ -344,72 +568,40 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return;
- WREG32_PCIE(lo_base_addr, 0UL);
- WREG32_PCIE(hi_base_addr, 0UL);
-}
-
-
-static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
- uint64_t config)
-{
- uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
- int ret, is_assigned;
-
- ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
-
- if (ret || is_assigned)
- return ret;
-
- ret = df_v3_6_pmc_get_ctrl_settings(adev,
- config,
- &lo_base_addr,
- &hi_base_addr,
- &lo_val,
- &hi_val);
-
- if (ret)
- return ret;
-
- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
- config, lo_base_addr, hi_base_addr, lo_val, hi_val);
-
- WREG32_PCIE(lo_base_addr, lo_val);
- WREG32_PCIE(hi_base_addr, hi_val);
-
- return ret;
+ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
}
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
int is_enable)
{
- uint32_t lo_base_addr, hi_base_addr, lo_val;
- int ret = 0;
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int err = 0, ret = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
+ if (is_enable)
+ return df_v3_6_pmc_add_cntr(adev, config);
df_v3_6_reset_perfmon_cntr(adev, config);
- if (is_enable) {
- ret = df_v3_6_add_perfmon_cntr(adev, config);
- } else {
- ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
config,
&lo_base_addr,
&hi_base_addr,
- NULL,
- NULL);
-
- if (ret)
- return ret;
+ &lo_val,
+ &hi_val);
- lo_val = RREG32_PCIE(lo_base_addr);
+ if (ret)
+ return ret;
- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
- config, lo_base_addr, hi_base_addr, lo_val);
+ err = df_v3_6_perfmon_arm_with_retry(adev,
+ lo_base_addr,
+ lo_val,
+ hi_base_addr,
+ hi_val);
- WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
- }
+ if (err)
+ ret = df_v3_6_pmc_set_deferred(adev, config, true);
break;
default:
@@ -422,7 +614,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
int is_disable)
{
- uint32_t lo_base_addr, hi_base_addr, lo_val;
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
int ret = 0;
switch (adev->asic_type) {
@@ -431,18 +623,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
config,
&lo_base_addr,
&hi_base_addr,
- NULL,
- NULL);
+ &lo_val,
+ &hi_val);
if (ret)
return ret;
- lo_val = RREG32_PCIE(lo_base_addr);
-
- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
- config, lo_base_addr, hi_base_addr, lo_val);
-
- WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
+ df_v3_6_reset_perfmon_cntr(adev, config);
if (is_disable)
df_v3_6_pmc_release_cntr(adev, config);
@@ -459,20 +646,31 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
uint64_t config,
uint64_t *count)
{
- uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ uint32_t lo_base_addr, hi_base_addr, lo_val = 0, hi_val = 0;
*count = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
-
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return;
- lo_val = RREG32_PCIE(lo_base_addr);
- hi_val = RREG32_PCIE(hi_base_addr);
+ /* rearm the counter or throw away count value on failure */
+ if (df_v3_6_pmc_is_deferred(adev, config)) {
+ int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
+ lo_base_addr, lo_val,
+ hi_base_addr, hi_val);
+
+ if (rearm_err)
+ return;
+
+ df_v3_6_pmc_set_deferred(adev, config, false);
+ }
+
+ df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
+ hi_base_addr, &hi_val);
*count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
@@ -480,17 +678,69 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
*count = 0;
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
- config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+ config, lo_base_addr, hi_base_addr, lo_val, hi_val);
break;
-
default:
break;
}
}
+static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
+ uint32_t df_inst)
+{
+ uint32_t base_addr_reg_val = 0;
+ uint64_t base_addr = 0;
+
+ base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
+ df_inst * DF_3_6_SMN_REG_INST_DIST);
+
+ if (REG_GET_FIELD(base_addr_reg_val,
+ DF_CS_UMC_AON0_DramBaseAddress0,
+ AddrRngVal) == 0) {
+ DRM_WARN("address range not valid");
+ return 0;
+ }
+
+ base_addr = REG_GET_FIELD(base_addr_reg_val,
+ DF_CS_UMC_AON0_DramBaseAddress0,
+ DramBaseAddr);
+
+ return base_addr << 28;
+}
+
+static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
+{
+ uint32_t xgmi_node_id = 0;
+ uint32_t df_inst_id = 0;
+
+ /* Walk through DF dst nodes to find current XGMI node */
+ for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
+
+ xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
+ df_inst_id * DF_3_6_SMN_REG_INST_DIST);
+ xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
+ DF_CS_UMC_AON0_DramLimitAddress0,
+ DstFabricID);
+
+ /* TODO: establish reason dest fabric id is offset by 7 */
+ xgmi_node_id = xgmi_node_id >> 7;
+
+ if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
+ break;
+ }
+
+ if (df_inst_id == DF_3_6_INST_CNT) {
+ DRM_WARN("cant match df dst id with gpu node");
+ return 0;
+ }
+
+ return df_inst_id;
+}
+
const struct amdgpu_df_funcs df_v3_6_funcs = {
.sw_init = df_v3_6_sw_init,
+ .sw_fini = df_v3_6_sw_fini,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
.get_fb_channel_number = df_v3_6_get_fb_channel_number,
.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
@@ -499,5 +749,9 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
.get_clockgating_state = df_v3_6_get_clockgating_state,
.pmc_start = df_v3_6_pmc_start,
.pmc_stop = df_v3_6_pmc_stop,
- .pmc_get_count = df_v3_6_pmc_get_count
+ .pmc_get_count = df_v3_6_pmc_get_count,
+ .get_fica = df_v3_6_get_fica,
+ .set_fica = df_v3_6_set_fica,
+ .get_dram_base_addr = df_v3_6_get_dram_base_addr,
+ .get_df_inst_id = df_v3_6_get_df_inst_id
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f41287f9000d..1785fdad6ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -20,8 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
@@ -36,6 +40,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
@@ -46,9 +51,6 @@
* Navi10 has two graphic rings to share each graphic pipe.
* 1. Primary ring
* 2. Async ring
- *
- * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
- * first.
*/
#define GFX10_NUM_GFX_RINGS 2
#define GFX10_MEC_HPD_SIZE 2048
@@ -56,6 +58,9 @@
#define F32_CE_PROGRAM_RAM_SIZE 65536
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define mmCGTT_GS_NGG_CLK_CTRL 0x5087
+#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
+
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -63,11 +68,30 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi12_me.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+
static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -91,17 +115,20 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
};
static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
@@ -109,6 +136,102 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
/* Pending on emulation bring up */
};
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
+{
+ /* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
+{
+ /* Pending on emulation bring up */
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -223,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 2,
};
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
@@ -250,6 +387,22 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_0_nv10,
(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
break;
+ case CHIP_NAVI14:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_nv14,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
+ break;
+ case CHIP_NAVI12:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_2,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_2_nv12,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
+ break;
default:
break;
}
@@ -331,20 +484,12 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
if (amdgpu_emu_mode == 1)
msleep(1);
else
- DRM_UDELAY(1);
- }
- if (i < adev->usec_timeout) {
- if (amdgpu_emu_mode == 1)
- DRM_INFO("ring test on %d succeeded in %d msecs\n",
- ring->idx, i);
- else
- DRM_INFO("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
- ring->idx, scratch, tmp);
- r = -EINVAL;
+ udelay(1);
}
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -394,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ else
r = -EINVAL;
- }
err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@@ -429,6 +570,31 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.cp_fw_write_wait = false;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
+ if ((adev->gfx.me_fw_version >= 0x00000046) &&
+ (adev->gfx.me_feature_version >= 27) &&
+ (adev->gfx.pfp_fw_version >= 0x00000068) &&
+ (adev->gfx.pfp_feature_version >= 27) &&
+ (adev->gfx.mec_fw_version >= 0x0000005b) &&
+ (adev->gfx.mec_feature_version >= 27))
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->gfx.cp_fw_write_wait == false)
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+}
+
+
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -450,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
}
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+ bool ret = false;
+
+ switch (adev->pdev->revision) {
+ case 0xc2:
+ case 0xc3:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret ;
+}
+
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_NAVI10:
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
break;
default:
break;
@@ -464,7 +648,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
+ char fw_name[40];
+ char wks[10];
int err;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -477,15 +662,25 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
+ memset(wks, 0, sizeof(wks));
switch (adev->asic_type) {
case CHIP_NAVI10:
chip_name = "navi10";
break;
+ case CHIP_NAVI14:
+ chip_name = "navi14";
+ if (!(adev->pdev->device == 0x7340 &&
+ adev->pdev->revision != 0x00))
+ snprintf(wks, sizeof(wks), "_wks");
+ break;
+ case CHIP_NAVI12:
+ chip_name = "navi12";
+ break;
default:
BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -496,7 +691,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -507,7 +702,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -518,61 +713,63 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
- version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
+ if (!amdgpu_sriov_vf(adev)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
+ adev->gfx.rlc.clear_state_descriptor_offset =
le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
+ adev->gfx.rlc.avail_scratch_ram_locations =
le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
+ adev->gfx.rlc.reg_restore_list_size =
le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
+ adev->gfx.rlc.reg_list_format_start =
le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
+ adev->gfx.rlc.reg_list_format_separate_start =
le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
+ adev->gfx.rlc.starting_offsets_start =
le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
+ adev->gfx.rlc.reg_list_format_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
+ adev->gfx.rlc.reg_list_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
+ adev->gfx.rlc.register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ err = -ENOMEM;
+ goto out;
+ }
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v10_0_init_rlc_ext_microcode(adev);
+ }
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -583,7 +780,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
if (!err) {
err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
@@ -625,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
+ if (info->fw) {
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
if (adev->gfx.rlc.is_rlc_v2_1 &&
adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
@@ -686,6 +884,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
}
+ gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
dev_err(adev->dev,
@@ -820,39 +1019,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1026,6 +1192,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1133,6 +1301,8 @@ static int gfx_v10_0_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
@@ -1292,7 +1462,7 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v10_0_pfp_fini(adev);
@@ -1452,6 +1622,25 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
}
}
+static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+
static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
{
int i, j, k;
@@ -1461,7 +1650,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
u32 utcl_invreq_disable = 0;
/*
* GCRD_TARGETS_DISABLE field contains
- * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
+ * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
+ * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
*/
u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
2 * max_wgp_per_sh + /* TCP */
@@ -1469,7 +1659,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
4); /* GL1C */
/*
* UTCL1_UTCL0_INVREQ_DISABLE field contains
- * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
+ * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
+ * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
*/
u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
2 * max_wgp_per_sh + /* TCP */
@@ -1477,7 +1668,9 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
4 + /* RMI */
1); /* SQG */
- if (adev->asic_type == CHIP_NAVI10) {
+ if (adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14 ||
+ adev->asic_type == CHIP_NAVI12) {
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
@@ -1518,6 +1711,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
}
}
+static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
+{
+ /* TCCs are global (not instanced). */
+ uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) |
+ RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE);
+
+ adev->gfx.config.tcc_disabled_mask =
+ REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+ (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp;
@@ -1529,13 +1733,14 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v10_0_get_tcc_info(adev);
adev->gfx.config.pa_sc_tile_steering_override =
gfx_v10_0_init_pa_sc_tile_steering_override(adev);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -1552,6 +1757,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v10_0_init_compute_vmid(adev);
+ gfx_v10_0_init_gds_vmid(adev);
}
@@ -1572,24 +1778,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
}
-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
-}
-
-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
-{
- gfx_v10_0_init_csb(adev);
-
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
- /* TODO: init power gating */
- return;
+ return 0;
}
void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
@@ -1624,9 +1824,9 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
* hence no handshake between SMU & RLC
* GFXOFF will be disabled
*/
- rlc_pg_cntl |= 0x80000;
+ rlc_pg_cntl |= 0x800000;
} else
- rlc_pg_cntl &= ~0x80000;
+ rlc_pg_cntl &= ~0x800000;
WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
}
@@ -1684,18 +1884,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
- if (amdgpu_sriov_vf(adev))
- return 0;
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
- gfx_v10_0_init_pg(adev);
- /* enable RLC SRM */
- gfx_v10_0_rlc_enable_srm(adev);
+ gfx_v10_0_init_csb(adev);
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v10_0_rlc_enable_srm(adev);
} else {
adev->gfx.rlc.funcs->stop(adev);
@@ -1717,7 +1915,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- gfx_v10_0_init_pg(adev);
+ gfx_v10_0_init_csb(adev);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -1767,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
rlc_toc++;
- };
+ }
return 0;
}
@@ -2184,7 +2383,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
int i;
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
@@ -2197,7 +2396,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
}
static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
@@ -2254,7 +2463,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2324,7 +2533,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -2393,7 +2602,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2568,7 +2777,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Init gfx ring 0 for pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
- mutex_unlock(&adev->srbm_mutex);
+
/* Set ring buffer size */
ring = &adev->gfx.gfx_ring[0];
rb_bufsz = order_base_2(ring->ring_size / 8);
@@ -2606,11 +2815,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- mutex_unlock(&adev->srbm_mutex);
ring = &adev->gfx.gfx_ring[1];
rb_bufsz = order_base_2(ring->ring_size / 8);
tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
@@ -2640,6 +2849,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
@@ -2714,7 +2924,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -2898,6 +3108,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
if (!adev->in_gpu_reset && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
@@ -2909,14 +3120,15 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
#endif
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else if (adev->in_gpu_reset) {
/* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
+ adev->wb.wb[ring->wptr_offs] = 0;
amdgpu_ring_clear_ring(ring);
#ifdef BRING_UP_DEBUG
mutex_lock(&adev->srbm_mutex);
@@ -3125,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -3397,23 +3612,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
- ring->sched.ready = true;
- DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
+ r = amdgpu_ring_test_helper(ring);
if (r)
- ring->sched.ready = false;
+ return r;
}
return 0;
@@ -3516,10 +3724,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gfx_v10_0_csb_vram_pin(adev);
- if (r)
- return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
@@ -3602,32 +3806,23 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
- pr_debug("For SRIOV client, shouldn't do anything.\n");
+ gfx_v10_0_cp_gfx_enable(adev, false);
return 0;
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
- gfx_v10_0_csb_vram_unpin(adev);
return 0;
}
static int gfx_v10_0_suspend(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->in_suspend = true;
- return gfx_v10_0_hw_fini(adev);
+ return gfx_v10_0_hw_fini(handle);
}
static int gfx_v10_0_resume(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
-
- r = gfx_v10_0_hw_init(adev);
- adev->in_suspend = false;
- return r;
+ return gfx_v10_0_hw_init(handle);
}
static bool gfx_v10_0_is_idle(void *handle)
@@ -4034,9 +4229,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
if (!enable) {
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
@@ -4056,8 +4252,10 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
gfx_v10_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -4173,7 +4371,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
switch (ring->me) {
@@ -4193,8 +4391,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
ref_and_mask, ref_and_mask, 0x20);
}
@@ -4453,7 +4651,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
if (ring->trail_seq ==
le32_to_cpu(*(ring->trail_fence_cpu_addr)))
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout) {
@@ -4539,6 +4737,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -4547,9 +4746,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -4581,6 +4780,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = false;
+
+ fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -4927,7 +5144,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
@@ -4971,6 +5188,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_tmz = gfx_v10_0_ring_emit_tmz,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -4978,7 +5196,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -5004,6 +5222,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5011,7 +5230,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -5034,6 +5253,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5088,6 +5308,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
default:
@@ -5097,15 +5319,12 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
{
- /* init asic gds info */
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- default:
- adev->gds.gds_size = 0x10000;
- adev->gds.gds_compute_max_wave_id = 0x4ff;
- break;
- }
+ unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+ adev->gds.gds_size = 0x10000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 7f0a63628c43..31f44d05e606 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1576,7 +1576,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev)
static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
{
u32 gb_addr_config = 0;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 sx_debug_1;
u32 hdp_host_path_cntl;
u32 tmp;
@@ -1678,7 +1678,6 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 21187275dfd3..8f20a5dd44fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1890,6 +1890,24 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
}
}
+static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
+}
+
static void gfx_v7_0_config_init(struct amdgpu_device *adev)
{
adev->gfx.config.double_offchip_lds_buf = 1;
@@ -1968,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v7_0_init_compute_vmid(adev);
+ gfx_v7_0_init_gds_vmid(adev);
WREG32(mmSX_DEBUG_1, 0x20);
@@ -4239,7 +4258,7 @@ static int gfx_v7_0_late_init(void *handle)
static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
@@ -4316,7 +4335,6 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
@@ -4535,6 +4553,8 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ee1ccdcf2d30..fa245973de12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1710,7 +1677,7 @@ fail:
static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
int ret;
@@ -1850,7 +1817,6 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
@@ -2103,7 +2069,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v8_0_mec_fini(adev);
@@ -3750,6 +3716,24 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
}
}
+static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
+}
+
static void gfx_v8_0_config_init(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -3816,6 +3800,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v8_0_init_compute_vmid(adev);
+ gfx_v8_0_init_gds_vmid(adev);
mutex_lock(&adev->grbm_idx_mutex);
/*
@@ -3898,6 +3883,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32(mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -4572,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -4818,10 +4807,6 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
- r = gfx_v8_0_csb_vram_pin(adev);
- if (r)
- return r;
-
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4939,8 +4924,6 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_err("rlc is busy, skip halt rlc\n");
amdgpu_gfx_rlc_exit_safe_mode(adev);
- gfx_v8_0_csb_vram_unpin(adev);
-
return 0;
}
@@ -6165,7 +6148,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
- /* EVENT_WRITE_EOP - flush caches, send int */
+ /* Workaround for cache flush problems. First send a dummy EOP
+ * event down the pipe with seq one below.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(1) | INT_SEL(0));
+ amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+ amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+ /* Then send the real EOP event down the pipe:
+ * EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
@@ -6450,6 +6449,7 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -6458,9 +6458,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -6907,7 +6907,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
- 8 + /* FENCE for VM_FLUSH */
+ 12 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
the first COND_EXEC jump to the place just
@@ -6919,7 +6919,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
- 8 + 8 + /* FENCE x2 */
+ 12 + 12 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 04b8ac4432c7..b33a4eb39193 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -36,10 +36,10 @@
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
+
#include "vega10_enum.h"
#include "hdp/hdp_4_0_offset.h"
-#include "soc15.h"
#include "soc15_common.h"
#include "clearstate_gfx9.h"
#include "v9_structs.h"
@@ -48,6 +48,8 @@
#include "amdgpu_ras.h"
+#include "gfx_v9_4.h"
+
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -60,6 +62,9 @@
#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
+#define mmGCEA_PROBE_MAP 0x070c
+#define mmGCEA_PROBE_MAP_BASE_IDX 0
+
MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -104,6 +109,397 @@ MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
+MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
+MODULE_FIRMWARE("amdgpu/renoir_me.bin");
+MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
+MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
+MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
+
+#define mmTCP_CHAN_STEER_0_ARCT 0x0b03
+#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_1_ARCT 0x0b04
+#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_2_ARCT 0x0b09
+#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
+#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
+#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
+#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
+
+enum ta_ras_gfx_subblock {
+ /*CPC*/
+ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPC_UCODE,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME1,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME2,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF*/
+ TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ TA_RAS_BLOCK__GFX_CPF_TAG,
+ TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG*/
+ TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ TA_RAS_BLOCK__GFX_CPG_TAG,
+ TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS*/
+ TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI*/
+ TA_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_LDS_D,
+ TA_RAS_BLOCK__GFX_SQ_LDS_I,
+ TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges)*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA*/
+ TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA*/
+ TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI*/
+ TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP*/
+ TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_DB_RAM,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD*/
+ TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0*/
+ TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1*/
+ TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2*/
+ TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank*/
+ TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker*/
+ TA_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ TA_RAS_BLOCK__GFX_MAX
+};
+
+struct ras_gfx_subblock {
+ unsigned char *name;
+ int ta_subblock;
+ int hw_supported_error_type;
+ int sw_supported_error_type;
+};
+
+#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
+ [AMDGPU_RAS_BLOCK__##subblock] = { \
+ #subblock, \
+ TA_RAS_BLOCK__##subblock, \
+ ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
+ (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
+ }
+
+static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
+};
+
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
@@ -123,9 +519,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -188,9 +584,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
@@ -227,6 +623,22 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
};
+static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
+};
+
static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
@@ -266,9 +678,23 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -310,19 +736,150 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if);
+
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- if (!amdgpu_virt_support_skip_setting(adev)) {
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0,
- ARRAY_SIZE(golden_settings_gc_9_0));
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0_vg10,
- ARRAY_SIZE(golden_settings_gc_9_0_vg10));
- }
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg10,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg10));
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -340,6 +897,11 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_0_vg20,
ARRAY_SIZE(golden_settings_gc_9_0_vg20));
break;
+ case CHIP_ARCTURUS:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_1_arct,
+ ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev, golden_settings_gc_9_1,
ARRAY_SIZE(golden_settings_gc_9_1));
@@ -352,12 +914,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_1_rv1,
ARRAY_SIZE(golden_settings_gc_9_1_rv1));
break;
+ case CHIP_RENOIR:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rn,
+ ARRAY_SIZE(golden_settings_gc_9_1_rn));
+ return; /* for renoir, don't need common goldensetting */
default:
break;
}
- soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
- (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
+ if (adev->asic_type != CHIP_ARCTURUS)
+ soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
+ (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
}
static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
@@ -538,6 +1106,12 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
+ if ((adev->gfx.mec_fw_version < 0x000001a5) ||
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46))
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+
switch (adev->asic_type) {
case CHIP_VEGA10:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
@@ -588,66 +1162,80 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
}
}
+struct amdgpu_gfxoff_quirk {
+ u16 chip_vendor;
+ u16 chip_device;
+ u16 subsys_vendor;
+ u16 subsys_device;
+ u8 revision;
+};
+
+static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
+ { 0, 0, 0, 0, 0 },
+};
+
+static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
+{
+ const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
+
+ while (p && p->chip_device != 0) {
+ if (pdev->vendor == p->chip_vendor &&
+ pdev->device == p->chip_device &&
+ pdev->subsystem_vendor == p->subsys_vendor &&
+ pdev->subsystem_device == p->subsys_device &&
+ pdev->revision == p->revision) {
+ return true;
+ }
+ ++p;
+ }
+ return false;
+}
+
static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
{
+ if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
- if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
- break;
- if ((adev->gfx.rlc_fw_version != 106 &&
- adev->gfx.rlc_fw_version < 531) ||
- (adev->gfx.rlc_fw_version == 53815) ||
- (adev->gfx.rlc_feature_version < 1) ||
- !adev->gfx.rlc.is_rlc_v2_1)
+ if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
+ ((adev->gfx.rlc_fw_version != 106 &&
+ adev->gfx.rlc_fw_version < 531) ||
+ (adev->gfx.rlc_fw_version == 53815) ||
+ (adev->gfx.rlc_feature_version < 1) ||
+ !adev->gfx.rlc.is_rlc_v2_1))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
+ case CHIP_RENOIR:
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
break;
default:
break;
}
}
-static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
+static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
{
- const char *chip_name;
char fw_name[30];
int err;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr;
- const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
- uint16_t version_major;
- uint16_t version_minor;
- uint32_t smu_version;
-
- DRM_DEBUG("\n");
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- chip_name = "vega10";
- break;
- case CHIP_VEGA12:
- chip_name = "vega12";
- break;
- case CHIP_VEGA20:
- chip_name = "vega20";
- break;
- case CHIP_RAVEN:
- if (adev->rev_id >= 8)
- chip_name = "raven2";
- else if (adev->pdev->device == 0x15d8)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- default:
- BUG();
- }
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
@@ -682,6 +1270,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
+ info->fw = adev->gfx.pfp_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
+ info->fw = adev->gfx.me_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
+ info->fw = adev->gfx.ce_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx9: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(adev->gfx.pfp_fw);
+ adev->gfx.pfp_fw = NULL;
+ release_firmware(adev->gfx.me_fw);
+ adev->gfx.me_fw = NULL;
+ release_firmware(adev->gfx.ce_fw);
+ adev->gfx.ce_fw = NULL;
+ }
+ return err;
+}
+
+static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ unsigned int *tmp = NULL;
+ unsigned int i = 0;
+ uint16_t version_major;
+ uint16_t version_minor;
+ uint32_t smu_version;
+
/*
* For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
* instead of picasso_rlc.bin.
@@ -756,57 +1396,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
if (adev->gfx.rlc.is_rlc_v2_1)
gfx_v9_0_init_rlc_ext_microcode(adev);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
-
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
- } else {
- err = 0;
- adev->gfx.mec2_fw = NULL;
- }
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
info->fw = adev->gfx.rlc_fw;
@@ -836,7 +1426,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
}
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx9: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(adev->gfx.rlc_fw);
+ adev->gfx.rlc_fw = NULL;
+ }
+ return err;
+}
+
+static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ if (!err) {
+ err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ } else {
+ err = 0;
+ adev->gfx.mec2_fw = NULL;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@@ -859,13 +1500,19 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
- }
+ /* TODO: Determine if MEC2 JT FW loading can be removed
+ for all GFX V9 asic and above */
+ if (adev->asic_type != CHIP_ARCTURUS &&
+ adev->asic_type != CHIP_RENOIR) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
+ info->fw = adev->gfx.mec2_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
+ PAGE_SIZE);
+ }
+ }
}
out:
@@ -875,14 +1522,6 @@ out:
dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n",
fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
release_firmware(adev->gfx.mec_fw);
adev->gfx.mec_fw = NULL;
release_firmware(adev->gfx.mec2_fw);
@@ -891,6 +1530,59 @@ out:
return err;
}
+static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ int r;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ chip_name = "vega10";
+ break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
+ case CHIP_RAVEN:
+ if (adev->rev_id >= 8)
+ chip_name = "raven2";
+ else if (adev->pdev->device == 0x15d8)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
+ break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
+ case CHIP_RENOIR:
+ chip_name = "renoir";
+ break;
+ default:
+ BUG();
+ }
+
+ /* No CPG in Arcturus */
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ return r;
+}
+
static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
{
u32 count = 0;
@@ -1128,7 +1820,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
/* TODO: double check the cp_table_size for RV */
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_gfx_rlc_init_cpt(adev);
@@ -1150,39 +1842,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1324,7 +1983,20 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.read_wave_data = &gfx_v9_0_read_wave_data,
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
- .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+ .ras_error_inject = &gfx_v9_0_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count
+};
+
+static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_0_select_se_sh,
+ .read_wave_data = &gfx_v9_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+ .ras_error_inject = &gfx_v9_4_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_4_query_ras_error_count
};
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -1377,6 +2049,27 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
else
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break;
+ case CHIP_ARCTURUS:
+ adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ break;
+ case CHIP_RENOIR:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22010042;
+ break;
default:
BUG();
break;
@@ -1422,190 +2115,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
- struct amdgpu_ngg_buf *ngg_buf,
- int size_se,
- int default_size_se)
-{
- int r;
-
- if (size_se < 0) {
- dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
- return -EINVAL;
- }
- size_se = size_se ? size_se : default_size_se;
-
- ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
- r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &ngg_buf->bo,
- &ngg_buf->gpu_addr,
- NULL);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
- return r;
- }
- ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
-
- return r;
-}
-
-static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < NGG_BUF_MAX; i++)
- amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
- &adev->gfx.ngg.buf[i].gpu_addr,
- NULL);
-
- memset(&adev->gfx.ngg.buf[0], 0,
- sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
-
- adev->gfx.ngg.init = false;
-
- return 0;
-}
-
-static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
-{
- int r;
-
- if (!amdgpu_ngg || adev->gfx.ngg.init == true)
- return 0;
-
- /* GDS reserve memory: 64 bytes alignment */
- adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
- adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
- adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
- adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
-
- /* Primitive Buffer */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
- amdgpu_prim_buf_per_se,
- 64 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Primitive Buffer\n");
- goto err;
- }
-
- /* Position Buffer */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
- amdgpu_pos_buf_per_se,
- 256 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Position Buffer\n");
- goto err;
- }
-
- /* Control Sideband */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
- amdgpu_cntl_sb_buf_per_se,
- 256);
- if (r) {
- dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
- goto err;
- }
-
- /* Parameter Cache, not created by default */
- if (amdgpu_param_buf_per_se <= 0)
- goto out;
-
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
- amdgpu_param_buf_per_se,
- 512 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Parameter Cache\n");
- goto err;
- }
-
-out:
- adev->gfx.ngg.init = true;
- return 0;
-err:
- gfx_v9_0_ngg_fini(adev);
- return r;
-}
-
-static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
- int r;
- u32 data, base;
-
- if (!amdgpu_ngg)
- return 0;
-
- /* Program buffer size */
- data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
- data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_POS].size >> 8);
- WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
-
- data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
- data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
- WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
-
- /* Program buffer base address */
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
- data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
- data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
-
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
- data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
- data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
-
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
- data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
- data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
-
- /* Clear GDS reserved memory */
- r = amdgpu_ring_alloc(ring, 17);
- if (r) {
- DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
- ring->name, r);
- return r;
- }
-
- gfx_v9_0_write_data_to_reg(ring, 0, false,
- SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
- (adev->gds.gds_size +
- adev->gfx.ngg.gds_reserve_size));
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
- amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
- PACKET3_DMA_DATA_DST_SEL(1) |
- PACKET3_DMA_DATA_SRC_SEL(2)));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
- adev->gfx.ngg.gds_reserve_size);
-
- gfx_v9_0_write_data_to_reg(ring, 0, false,
- SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
-
- amdgpu_ring_commit(ring);
-
- return 0;
-}
-
static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
@@ -1653,6 +2162,8 @@ static int gfx_v9_0_sw_init(void *handle)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
adev->gfx.mec.num_mec = 2;
break;
default:
@@ -1771,10 +2282,6 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ngg_init(adev);
- if (r)
- return r;
-
return 0;
}
@@ -1784,19 +2291,7 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
- adev->gfx.ras_if) {
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
+ amdgpu_gfx_ras_fini(adev);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -1804,13 +2299,12 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v9_0_mec_fini(adev);
- gfx_v9_0_ngg_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
- if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
@@ -1929,6 +2423,40 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
}
}
+static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
+ DISABLE_BARRIER_WAITCNT, 1);
+ WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
+ break;
+ default:
+ break;
+ };
+}
+
static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp;
@@ -1945,7 +2473,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
soc15_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
if (i == 0) {
@@ -1973,6 +2501,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_init_compute_vmid(adev);
+ gfx_v9_0_init_gds_vmid(adev);
+ gfx_v9_0_init_sq_config(adev);
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
@@ -2028,6 +2558,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -2357,7 +2888,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- gfx_v9_1_init_rlc_save_restore_list(adev);
+ if (adev->asic_type == CHIP_VEGA12 ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id >= 8))
+ gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
}
@@ -2768,74 +3302,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- uint64_t queue_mask = 0;
- int r, i;
-
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
- continue;
-
- /* This situation may be hit in the future if a new HW
- * generation exposes more than 64 queues. If so, the
- * definition of queue_mask needs updating */
- if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
- break;
- }
-
- queue_mask |= (1ull << i);
- }
-
- r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- /* set resources */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
- amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
- PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
- amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
- amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* oac mask */
- amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
- /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
- PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
- PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
- PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
- PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
- PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
- PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
- PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
- PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
- amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
- }
-
- r = amdgpu_ring_test_helper(kiq_ring);
- if (r)
- DRM_ERROR("KCQ enable failed\n");
-
- return r;
-}
-
static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -2849,6 +3315,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
mqd->dynamic_cu_mask_addr_lo =
@@ -2968,8 +3438,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -3238,7 +3711,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = gfx_v9_0_kiq_kcq_enable(adev);
+ r = amdgpu_gfx_enable_kcq(adev);
done:
return r;
}
@@ -3252,10 +3725,12 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
gfx_v9_0_enable_gui_idle_interrupt(adev, false);
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- /* legacy firmware loading */
- r = gfx_v9_0_cp_gfx_load_microcode(adev);
- if (r)
- return r;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ /* legacy firmware loading */
+ r = gfx_v9_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+ }
r = gfx_v9_0_cp_compute_load_microcode(adev);
if (r)
@@ -3266,18 +3741,22 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
if (r)
return r;
- r = gfx_v9_0_cp_gfx_resume(adev);
- if (r)
- return r;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ r = gfx_v9_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ }
r = gfx_v9_0_kcq_resume(adev);
if (r)
return r;
- ring = &adev->gfx.gfx_ring[0];
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -3289,9 +3768,27 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return 0;
}
+static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if (adev->asic_type != CHIP_ARCTURUS)
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
+ adev->df.hash_status.hash_64k);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
+ adev->df.hash_status.hash_2m);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
+ adev->df.hash_status.hash_1g);
+ WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
+}
+
static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
{
- gfx_v9_0_cp_gfx_enable(adev, enable);
+ if (adev->asic_type != CHIP_ARCTURUS)
+ gfx_v9_0_cp_gfx_enable(adev, enable);
gfx_v9_0_cp_compute_enable(adev, enable);
}
@@ -3300,13 +3797,12 @@ static int gfx_v9_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gfx_v9_0_init_golden_registers(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfx_v9_0_init_golden_registers(adev);
gfx_v9_0_constants_init(adev);
- r = gfx_v9_0_csb_vram_pin(adev);
- if (r)
- return r;
+ gfx_v9_0_init_tcp_config(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
@@ -3316,40 +3812,6 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ngg_en(adev);
- if (r)
- return r;
-
- return r;
-}
-
-static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
-{
- int r, i;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
- if (r)
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
- PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
- PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
- PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
- amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- }
- r = amdgpu_ring_test_helper(kiq_ring);
- if (r)
- DRM_ERROR("KCQ disable failed\n");
-
return r;
}
@@ -3361,8 +3823,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
- /* disable KCQ to avoid CPC touch memory not valid anymore */
- gfx_v9_0_kcq_disable(adev);
+ /* DF freeze and kcq disable will fail */
+ if (!amdgpu_ras_intr_triggered())
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
+ amdgpu_gfx_disable_kcq(adev);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3391,8 +3855,6 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
- gfx_v9_0_csb_vram_unpin(adev);
-
return 0;
}
@@ -3466,8 +3928,9 @@ static int gfx_v9_0_soft_reset(void *handle)
/* stop the rlc */
adev->gfx.rlc.funcs->stop(adev);
- /* Disable GFX parsing/prefetching */
- gfx_v9_0_cp_gfx_enable(adev, false);
+ if (adev->asic_type != CHIP_ARCTURUS)
+ /* Disable GFX parsing/prefetching */
+ gfx_v9_0_cp_gfx_enable(adev, false);
/* Disable MEC parsing/prefetching */
gfx_v9_0_cp_compute_enable(adev, false);
@@ -3497,9 +3960,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
uint64_t clock;
mutex_lock(&adev->gfx.gpu_clock_mutex);
- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+ uint32_t tmp, lsb, msb, i = 0;
+ do {
+ if (i != 0)
+ udelay(1);
+ tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+ msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ i++;
+ } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+ clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ }
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
}
@@ -3567,33 +4043,61 @@ static const u32 sgpr_init_compute_shader[] =
0xbe800080, 0xbf810000,
};
+/* When below register arrays changed, please update gpr_reg_size,
+ and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
+ to cover all gfx9 ASICs */
static const struct soc15_reg_entry vgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const struct soc15_reg_entry sgpr1_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
};
-static const struct soc15_reg_entry sgpr_init_regs[] = {
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+static const struct soc15_reg_entry sgpr2_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
};
-static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
@@ -3614,6 +4118,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
@@ -3626,6 +4131,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
+ { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
};
static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
@@ -3633,6 +4139,10 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
int i, r;
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
r = amdgpu_ring_alloc(ring, 7);
if (r) {
DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
@@ -3676,10 +4186,16 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- int r, i, j, k;
+ int r, i;
unsigned total_size, vgpr_offset, sgpr_offset;
u64 gpu_addr;
+ int compute_dim_x = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se;
+ int sgpr_work_group_size = 5;
+ int gpr_reg_size = compute_dim_x / 16 + 6;
+
/* only support when RAS is enabled */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return 0;
@@ -3689,9 +4205,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
return 0;
total_size =
- ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
+ total_size +=
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
total_size +=
- ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
total_size = ALIGN(total_size, 256);
vgpr_offset = total_size;
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
@@ -3718,7 +4236,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* VGPR */
/* write the register state for the compute dispatch */
- for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+ for (i = 0; i < gpr_reg_size; i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
- PACKET3_SET_SH_REG_START;
@@ -3734,7 +4252,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
- ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
@@ -3744,13 +4262,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
- /* SGPR */
+ /* SGPR1 */
/* write the register state for the compute dispatch */
- for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+ for (i = 0; i < gpr_reg_size; i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
- ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
- PACKET3_SET_SH_REG_START;
- ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+ ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
}
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
@@ -3762,7 +4280,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
- ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR2 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < gpr_reg_size; i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
@@ -3786,18 +4332,17 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
goto fail;
}
- /* read back registers to clear the counters */
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
- for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
- for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0x0, k);
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
- }
- }
+ switch (adev->asic_type)
+ {
+ case CHIP_VEGA20:
+ gfx_v9_0_clear_ras_edc_counter(adev);
+ break;
+ case CHIP_ARCTURUS:
+ gfx_v9_4_clear_ras_edc_counter(adev);
+ break;
+ default:
+ break;
}
- WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
- mutex_unlock(&adev->grbm_idx_mutex);
fail:
amdgpu_ib_free(adev, &ib, NULL);
@@ -3810,8 +4355,12 @@ static int gfx_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ if (adev->asic_type == CHIP_ARCTURUS)
+ adev->gfx.num_gfx_rings = 0;
+ else
+ adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
gfx_v9_0_set_gds_init(adev);
@@ -3820,108 +4369,33 @@ static int gfx_v9_0_early_init(void *handle)
return 0;
}
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry);
-
static int gfx_v9_0_ecc_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .cb = gfx_v9_0_process_ras_data_cb,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "gfx_err_count",
- .debugfs_name = "gfx_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__GFX,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "gfx",
- };
int r;
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
- return 0;
+ /*
+ * Temp workaround to fix the issue that CP firmware fails to
+ * update read pointer when CPDMA is writing clearing operation
+ * to GDS in suspend/resume sequence on several cards. So just
+ * limit this operation in cold boot sequence.
+ */
+ if (!adev->in_suspend) {
+ r = gfx_v9_0_do_edc_gds_workarounds(adev);
+ if (r)
+ return r;
}
- r = gfx_v9_0_do_edc_gds_workarounds(adev);
- if (r)
- return r;
-
/* requires IBs so do in late init after IB pool is initialized */
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
if (r)
return r;
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__GFX);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = ras_block;
-
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__GFX);
- r = 0;
- }
- goto feature;
- }
-
- ih_info.head = **ras_if;
- fs_info.head = **ras_if;
-
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
-
- amdgpu_ras_debugfs_create(adev, &fs_info);
-
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
- if (r)
- goto sysfs;
-resume:
- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ r = amdgpu_gfx_ras_late_init(adev);
if (r)
- goto irq;
+ return r;
return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
}
static int gfx_v9_0_late_init(void *handle)
@@ -3992,7 +4466,8 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
} else {
gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
- gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
amdgpu_gfx_rlc_exit_safe_mode(adev);
@@ -4097,6 +4572,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return;
+
amdgpu_gfx_rlc_enter_safe_mode(adev);
/* Enable 3D CGCG/CGLS */
@@ -4162,8 +4640,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->asic_type == CHIP_ARCTURUS)
+ data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ else
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
@@ -4231,10 +4713,11 @@ static int gfx_v9_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
if (!enable) {
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
@@ -4289,8 +4772,10 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
gfx_v9_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -4307,12 +4792,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
*flags = 0;
/* AMD_CG_SUPPORT_GFX_MGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
*flags |= AMD_CG_SUPPORT_GFX_MGCG;
/* AMD_CG_SUPPORT_GFX_CGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CGCG;
@@ -4321,23 +4806,25 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_GFX_CGLS;
/* AMD_CG_SUPPORT_GFX_RLC_LS */
- data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
/* AMD_CG_SUPPORT_GFX_CP_LS */
- data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
- /* AMD_CG_SUPPORT_GFX_3D_CGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
- *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
- /* AMD_CG_SUPPORT_GFX_3D_CGLS */
- if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
- *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+ }
}
static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
@@ -4379,7 +4866,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
switch (ring->me) {
@@ -4399,8 +4886,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
ref_and_mask, ref_and_mask, 0x20);
}
@@ -4801,6 +5288,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -4809,9 +5297,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -5132,31 +5620,788 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry)
+
+static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
+ },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
+ },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
+ 0, 0
+ },
+ { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
+ 0, 0
+ },
+ { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
+ },
+ { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
+ },
+ { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
+ 0, 0
+ },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
+ },
+ { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
+ 0, 0
+ },
+ { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
+ },
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
+ },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
+ },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
+ },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
+ },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
+ },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
+ },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ 0, 0
+ },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
+ },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
+ },
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
+ },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
+ },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
+ },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
+ },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
+ },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
+ },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
+ },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
+ },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
+ },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
+ },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
+ },
+ { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ 0, 0
+ }
+};
+
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if)
+{
+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
+ int ret;
+ struct ta_ras_trigger_error_input block_info = { 0 };
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
+ return -EINVAL;
+
+ if (!ras_gfx_subblocks[info->head.sub_block_index].name)
+ return -EPERM;
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
+ return -EPERM;
+ }
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
+ return -EPERM;
+ }
+
+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
+ block_info.sub_block_index =
+ ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
+ block_info.address = info->address;
+ block_info.value = info->value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return ret;
+}
+
+static const char *vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+};
+
+static const char *vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_LOG_FIFO",
+};
+
+static const char *atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char *atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_2m_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_4k_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = (data & 0x00018000L) >> 0xf;
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_4k_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id, uint32_t value,
+ uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
+ if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_0_ras_fields[i].seg != reg->seg ||
+ gfx_v9_0_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value &
+ gfx_v9_0_ras_fields[i].sec_count_mask) >>
+ gfx_v9_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ gfx_v9_0_ras_fields[i].ded_count_mask) >>
+ gfx_v9_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
{
- /* TODO ue will trigger an interrupt. */
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- amdgpu_ras_reset_gpu(adev, 0);
- return AMDGPU_RAS_UE;
+ int i, j, k;
+
+ /* read back registers to clear the counters */
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
}
-static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
{
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
+ uint32_t reg_value;
- if (!ras_if)
- return 0;
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0, k);
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
+ j, k, reg_value,
+ &sec_count, &ded_count);
+ }
+ }
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
- ih_data.head = *ras_if;
+ gfx_v9_0_query_utc_edc_status(adev, err_data);
- DRM_ERROR("CP ECC ERROR IRQ\n");
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
@@ -5183,7 +6428,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
@@ -5234,7 +6479,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -5269,7 +6514,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -5323,7 +6568,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
.set = gfx_v9_0_set_cp_ecc_error_state,
- .process = gfx_v9_0_cp_ecc_error_irq,
+ .process = amdgpu_gfx_cp_ecc_error_irq,
};
@@ -5349,6 +6594,8 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
default:
@@ -5366,6 +6613,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
adev->gds.gds_size = 0x10000;
break;
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
adev->gds.gds_size = 0x1000;
break;
default:
@@ -5387,6 +6635,9 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
else
adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
break;
+ case CHIP_ARCTURUS:
+ adev->gds.gds_compute_max_wave_id = 0xfff;
+ break;
default:
/* this really depends on the chip */
adev->gds.gds_compute_max_wave_id = 0x7ff;
@@ -5431,12 +6682,21 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
- unsigned disable_masks[4 * 2];
+ unsigned disable_masks[4 * 4];
if (!adev || !cu_info)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -5445,11 +6705,23 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
ao_bitmap = 0;
counter = 0;
gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
- if (i < 4 && j < 2)
- gfx_v9_0_set_user_cu_inactive_bitmap(
- adev, disable_masks[i * 2 + j]);
+ gfx_v9_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+
+ /*
+ * The bitmap(and ao_cu_bitmap) in cu_info structure is
+ * 4x4 size array, and it's usually suitable for Vega
+ * ASICs which has 4*2 SE/SH layout.
+ * But for Arcturus, SE/SH layout is changed to 8*1.
+ * To mostly reduce the impact, we make it compatible
+ * with current bitmap array as below:
+ * SE4,SH0 --> bitmap[0][1]
+ * SE5,SH0 --> bitmap[1][1]
+ * SE6,SH0 --> bitmap[2][1]
+ * SE7,SH0 --> bitmap[3][1]
+ */
+ cu_info->bitmap[i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
@@ -5462,7 +6734,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
active_cu_number += counter;
if (i < 2 && j < 2)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
- cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
}
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
new file mode 100644
index 000000000000..f099f13d7f1e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_pm.h"
+
+#include "gc/gc_9_4_1_offset.h"
+#include "gc/gc_9_4_1_sh_mask.h"
+#include "soc15_common.h"
+
+#include "gfx_v9_4.h"
+#include "amdgpu_ras.h"
+
+static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = {
+ /* CPC */
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 },
+ /* DC */
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 },
+ /* CPF */
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 },
+ /* GDS */
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 },
+ /* SPI */
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 },
+ /* SQ */
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16 },
+ /* SQC */
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 },
+ /* TA */
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 },
+ /* TCA */
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 },
+ /* TCC */
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 },
+ /* TCI */
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 },
+ /* TCP */
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 },
+ /* TD */
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 },
+ /* GCEA */
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 },
+ /* RLC */
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 },
+};
+
+static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = {
+ /* CPC */
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_RESTORE_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) },
+ { "CPC_DC_RESTORE_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) },
+
+ /* CPF */
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) },
+ { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) },
+
+ /* GDS */
+ { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+
+ /* SPI */
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) },
+ { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) },
+ { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) },
+ { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) },
+ { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) },
+
+ /* SQ */
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+
+ /* SQC */
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+
+ /* TA */
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) },
+
+ /* TCA */
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) },
+
+ /* TCC */
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) },
+
+ /* TCI */
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) },
+
+ /* TCP */
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+
+ /* TD */
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) },
+
+ /* EA */
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) },
+ { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) },
+ { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) },
+ { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) },
+ { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) },
+ { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) },
+ { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) },
+ { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) },
+ { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) },
+
+ /* RLC */
+ { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) },
+ { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) },
+ { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) },
+ { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) },
+ { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) },
+ { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) },
+ { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) },
+};
+
+static const char * const vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+ "UTC_VML2_IFIFO_GROUP0",
+ "UTC_VML2_IFIFO_GROUP1",
+ "UTC_VML2_IFIFO_GROUP2",
+ "UTC_VML2_IFIFO_GROUP3",
+ "UTC_VML2_IFIFO_GROUP4",
+ "UTC_VML2_IFIFO_GROUP5",
+ "UTC_VML2_IFIFO_GROUP6",
+ "UTC_VML2_IFIFO_GROUP7",
+ "UTC_VML2_IFIFO_GROUP8",
+ "UTC_VML2_IFIFO_GROUP9",
+ "UTC_VML2_IFIFO_GROUP10",
+ "UTC_VML2_IFIFO_GROUP11",
+ "UTC_VML2_IFIFO_GROUP12",
+ "UTC_VML2_IFIFO_GROUP13",
+ "UTC_VML2_IFIFO_GROUP14",
+ "UTC_VML2_IFIFO_GROUP15",
+ "UTC_VML2_IFIFO_GROUP16",
+ "UTC_VML2_IFIFO_GROUP17",
+ "UTC_VML2_IFIFO_GROUP18",
+ "UTC_VML2_IFIFO_GROUP19",
+ "UTC_VML2_IFIFO_GROUP20",
+ "UTC_VML2_IFIFO_GROUP21",
+ "UTC_VML2_IFIFO_GROUP22",
+ "UTC_VML2_IFIFO_GROUP23",
+ "UTC_VML2_IFIFO_GROUP24",
+};
+
+static const char * const vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_ARADDRS",
+ "UTC_VML2_RDIF_LOG_FIFO",
+ "UTC_VML2_QUEUE_REQ",
+ "UTC_VML2_QUEUE_RET",
+};
+
+static const char * const utcl2_router_mems[] = {
+ "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0",
+ "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1",
+ "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2",
+ "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3",
+ "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4",
+ "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5",
+ "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6",
+ "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7",
+ "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8",
+ "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9",
+ "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10",
+ "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11",
+ "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12",
+ "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13",
+ "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14",
+ "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15",
+ "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16",
+ "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17",
+ "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18",
+ "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19",
+ "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20",
+ "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21",
+ "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22",
+ "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23",
+ "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24",
+};
+
+static const char * const atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char * const atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ utcl2_router_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ utcl2_router_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_2m_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_2m_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_4k_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_4k_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id,
+ uint32_t value, uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) {
+ if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_4_ras_fields[i].seg != reg->seg ||
+ gfx_v9_4_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >>
+ gfx_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >>
+ gfx_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0, k);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_4_ras_error_count(
+ &gfx_v9_4_edc_counter_regs[i],
+ j, k, reg_value, &sec_count,
+ &ded_count);
+ }
+ }
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_4_query_utc_edc_status(adev, err_data);
+
+ return 0;
+}
+
+void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+}
+
+int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
+{
+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
+ int ret;
+ struct ta_ras_trigger_error_input block_info = { 0 };
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
+ block_info.sub_block_index = info->head.sub_block_index;
+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
+ block_info.address = info->address;
+ block_info.value = info->value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
new file mode 100644
index 000000000000..2e3f6f755ad4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_H__
+#define __GFX_V9_4_H__
+
+void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev);
+
+int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+
+int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if);
+
+#endif /* __GFX_V9_4_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 15986748f59f..1a2f18b908fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -75,40 +75,45 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
- /* Program the system aperture low logical page number. */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-
- if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
- /*
- * Raven2 has a HW issue that it is unable to use the vram which
- * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
- * workaround that increase system aperture high address (add 1)
- * to get rid of the VM fault and hardware hang.
- */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max((adev->gmc.fb_end >> 18) + 0x1,
- adev->gmc.agp_end >> 18));
- else
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
-
- /* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- (u32)(value >> 12));
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- (u32)(value >> 44));
-
- /* Program "protection fault". */
- WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page_addr >> 44));
-
- WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, 0,
+ mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(
+ GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ }
}
static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
@@ -178,6 +183,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
}
@@ -262,7 +269,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
/*
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
@@ -278,10 +285,12 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
gfxhub_v1_0_init_gart_aperture_regs(adev);
gfxhub_v1_0_init_system_aperture_regs(adev);
gfxhub_v1_0_init_tlb_regs(adev);
- gfxhub_v1_0_init_cache_regs(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_init_cache_regs(adev);
gfxhub_v1_0_enable_system_domain(adev);
- gfxhub_v1_0_disable_identity_aperture(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_disable_identity_aperture(adev);
gfxhub_v1_0_setup_vmid_config(adev);
gfxhub_v1_0_program_invalidation(adev);
@@ -357,7 +366,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
void gfxhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -365,6 +374,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 5e9ab8eb214a..c0ab71df0d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ u32 max_num_physical_nodes = 0;
+ u32 max_physical_node_id = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ case CHIP_ARCTURUS:
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+ break;
+ default:
+ return -EINVAL;
+ }
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > 4)
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > 3)
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index d605b4963f8a..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
}
-static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
{
- uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+ /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
+ int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ offset * vmid, lower_32_bits(page_table_base));
- WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- lower_32_bits(value));
-
- WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- upper_32_bits(value));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ offset * vmid, upper_32_bits(page_table_base));
}
static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
- gfxhub_v2_0_init_gart_pt_regs(adev);
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
@@ -140,7 +144,7 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
- tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
@@ -151,6 +155,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
tmp = mmGCVM_L2_CNTL4_DEFAULT;
@@ -166,6 +179,8 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
}
@@ -333,7 +348,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
void gfxhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -341,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
index 06807940748b..392b8cd94fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v2_0_init(struct amdgpu_device *adev);
u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev);
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5eeb72fcc123..9775eca6fe43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
#include "navi10_enum.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "nbio_v2_3.h"
@@ -62,7 +65,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_vmhub *hub;
u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i;
- bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
@@ -70,7 +73,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
- bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
@@ -81,39 +84,39 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
- hub = &adev->vmhub[AMDGPU_MMHUB];
+ hub = &adev->vmhub[AMDGPU_MMHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
tmp = RREG32(reg);
- tmp &= ~bits[AMDGPU_MMHUB];
+ tmp &= ~bits[AMDGPU_MMHUB_0];
WREG32(reg, tmp);
}
/* GFX HUB */
- hub = &adev->vmhub[AMDGPU_GFXHUB];
+ hub = &adev->vmhub[AMDGPU_GFXHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
tmp = RREG32(reg);
- tmp &= ~bits[AMDGPU_GFXHUB];
+ tmp &= ~bits[AMDGPU_GFXHUB_0];
WREG32(reg, tmp);
}
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
- hub = &adev->vmhub[AMDGPU_MMHUB];
+ hub = &adev->vmhub[AMDGPU_MMHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
tmp = RREG32(reg);
- tmp |= bits[AMDGPU_MMHUB];
+ tmp |= bits[AMDGPU_MMHUB_0];
WREG32(reg, tmp);
}
/* GFX HUB */
- hub = &adev->vmhub[AMDGPU_GFXHUB];
+ hub = &adev->vmhub[AMDGPU_GFXHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
tmp = RREG32(reg);
- tmp |= bits[AMDGPU_GFXHUB];
+ tmp |= bits[AMDGPU_GFXHUB_0];
WREG32(reg, tmp);
}
break;
@@ -136,22 +139,53 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB_0)
+ RREG32(hub->vm_l2_pro_fault_status);
+
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
}
if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
dev_err(adev->dev,
- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
+ "for process %s pid %d thread %s pid %d)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid);
- dev_err(adev->dev, " at page 0x%016llx from %d\n",
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
addr, entry->client_id);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev)) {
dev_err(adev->dev,
- "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+ }
}
return 0;
@@ -188,6 +222,34 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -198,13 +260,44 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ u32 tmp;
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
+
+ /*
+ * Issue a dummy read to wait for the ACK register to be cleared
+ * to avoid a false ACK due to the new fast GRBM interface.
+ */
+ if (vmhub == AMDGPU_GFXHUB_0)
+ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
@@ -216,6 +309,16 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
udelay(1);
}
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
if (i < adev->usec_timeout)
return;
@@ -230,8 +333,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
*
* Flush the TLB for the requested page table.
*/
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct dma_fence *fence;
@@ -240,15 +343,23 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
int r;
/* flush hdp cache */
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
mutex_lock(&adev->mman.gtt_window_lock);
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0);
+ if (vmhub == AMDGPU_MMHUB_0) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+ }
+
+ BUG_ON(vmhub != AMDGPU_GFXHUB_0);
+
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
- adev->in_gpu_reset) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0);
+ adev->in_gpu_reset ||
+ ring->sched.ready == false) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
@@ -264,6 +375,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
@@ -285,24 +397,102 @@ error_alloc:
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, flush_type);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
return pd_addr;
}
@@ -313,7 +503,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+ if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -352,43 +542,23 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
* 1 system
* 0 valid
*/
-static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
-
- switch (flags & AMDGPU_VM_MTYPE_MASK) {
+static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+{
+ switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
case AMDGPU_VM_MTYPE_CC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
default:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
}
-
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
}
static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -415,12 +585,33 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
}
+static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+ *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+}
+
static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
- .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v10_0_get_vm_pde
+ .map_mtype = gmc_v10_0_map_mtype,
+ .get_vm_pde = gmc_v10_0_get_vm_pde,
+ .get_vm_pte = gmc_v10_0_get_vm_pte
};
static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -449,22 +640,13 @@ static int gmc_v10_0_early_init(void *handle)
static int gmc_v10_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
- unsigned i;
+ int r;
- for(i = 0; i < adev->num_rings; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
- unsigned vmhub = ring->funcs->vmhub;
+ amdgpu_bo_late_init(adev);
- ring->vm_inv_eng = vm_inv_eng[vmhub]++;
- dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
- ring->idx, ring->name, ring->vm_inv_eng,
- ring->funcs->vmhub);
- }
-
- /* Engine 17 is used for GART flushes */
- for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
- BUG_ON(vm_inv_eng[i] > 17);
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
}
@@ -474,8 +656,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = 0;
- if (!amdgpu_sriov_vf(adev))
- base = gfxhub_v2_0_get_fb_location(adev);
+ base = gfxhub_v2_0_get_fb_location(adev);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
@@ -495,24 +676,13 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
{
- int chansize, numchan;
-
- if (!amdgpu_emu_mode)
- adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
- else {
- /* hard code vram_width for emulation */
- chansize = 128;
- numchan = 1;
- adev->gmc.vram_width = numchan * chansize;
- }
-
/* Could aper size report 0 ? */
adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
/* size in MB on si */
adev->gmc.mc_vram_size =
- adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
adev->gmc.visible_vram_size = adev->gmc.aper_size;
@@ -524,6 +694,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -589,8 +761,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v10_0_sw_init(void *handle)
{
- int r;
- int dma_bits;
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v2_0_init(adev);
@@ -598,12 +769,23 @@ static int gmc_v10_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (!amdgpu_emu_mode)
+ adev->gmc.vram_width = vram_width;
+ else
+ adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
- * vm size is 256TB (48bit), maximum size of Navi10,
+ * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
* block size 512 (9bit)
*/
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -616,6 +798,10 @@ static int gmc_v10_0_sw_init(void *handle)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
@@ -628,35 +814,10 @@ static int gmc_v10_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /*
- * Reserve 8M stolen memory for navi10 like vega10
- * TODO: will check if it's really needed on asic.
- */
- if (amdgpu_emu_mode == 1)
- adev->gmc.stolen_size = 0;
- else
- adev->gmc.stolen_size = 9 * 1024 *1024;
-
- /*
- * Set DMA mask + need_dma32 flags.
- * PCIE - can handle 44-bits.
- * IGP - can handle 44-bits
- * PCI - dma32 for legacy pci gart, 44 bits on navi10
- */
- adev->need_dma32 = false;
- dma_bits = adev->need_dma32 ? 32 : 44;
-
- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- adev->need_dma32 = true;
- dma_bits = 32;
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
- }
-
- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
- if (r) {
- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
+ return r;
}
r = gmc_v10_0_mc_init(adev);
@@ -680,8 +841,8 @@ static int gmc_v10_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
- adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
amdgpu_vm_manager_init(adev);
@@ -717,6 +878,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
break;
default:
break;
@@ -759,14 +922,15 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
/* Flush HDP after it is initialized */
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
gfxhub_v2_0_set_fault_enable_default(adev, value);
mmhub_v2_0_set_fault_enable_default(adev, value);
- gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index ca8dbe91cc8b..b205039350b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -362,8 +362,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
return 0;
}
-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
@@ -386,27 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
@@ -571,7 +564,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v6_0_set_fault_enable_default(adev, true);
- gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
@@ -839,9 +832,10 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v6_0_sw_init(void *handle)
{
int r;
- int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->num_vmhubs = 1;
+
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
@@ -862,20 +856,12 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.mc_mask = 0xffffffffffULL;
- adev->need_dma32 = false;
- dma_bits = adev->need_dma32 ? 32 : 40;
- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- adev->need_dma32 = true;
- dma_bits = 32;
dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
}
- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
- if (r) {
- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n");
- }
- adev->need_swiotlb = drm_need_swiotlb(dma_bits);
+ adev->need_swiotlb = drm_need_swiotlb(44);
r = gmc_v6_0_init_microcode(adev);
if (r) {
@@ -1160,7 +1146,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
.emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
.set_prt = gmc_v6_0_set_prt,
.get_vm_pde = gmc_v6_0_get_vm_pde,
- .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
+ .get_vm_pte = gmc_v6_0_get_vm_pte,
};
static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 57f80065d57a..9da9596a3638 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -381,7 +381,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU &&
+ adev->gmc.real_vram_size > adev->gmc.aper_size) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
@@ -418,6 +419,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -433,8 +466,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested page table (CIK).
*/
-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -463,27 +496,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
-static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -677,7 +703,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmCHUB_CONTROL, tmp);
}
- gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
@@ -959,9 +985,10 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v7_0_sw_init(void *handle)
{
int r;
- int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->num_vmhubs = 1;
+
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
@@ -990,25 +1017,12 @@ static int gmc_v7_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- /* set DMA mask + need_dma32 flags.
- * PCIE - can handle 40-bits.
- * IGP - can handle 40-bits
- * PCI - dma32 for legacy pci gart, 40 bits on newer asics
- */
- adev->need_dma32 = false;
- dma_bits = adev->need_dma32 ? 32 : 40;
- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
if (r) {
- adev->need_dma32 = true;
- dma_bits = 32;
pr_warn("amdgpu: No suitable DMA available\n");
+ return r;
}
- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
- if (r) {
- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- pr_warn("amdgpu: No coherent DMA available\n");
- }
- adev->need_swiotlb = drm_need_swiotlb(dma_bits);
+ adev->need_swiotlb = drm_need_swiotlb(40);
r = gmc_v7_0_init_microcode(adev);
if (r) {
@@ -1352,11 +1366,12 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
.set_prt = gmc_v7_0_set_prt,
- .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v7_0_get_vm_pde
+ .get_vm_pde = gmc_v7_0_get_vm_pde,
+ .get_vm_pte = gmc_v7_0_get_vm_pte
};
static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9238280d1ff7..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -635,8 +668,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested page table (VI).
*/
-static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -686,29 +719,21 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -921,7 +946,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v8_0_set_fault_enable_default(adev, true);
- gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
@@ -1079,9 +1104,10 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v8_0_sw_init(void *handle)
{
int r;
- int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->num_vmhubs = 1;
+
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
@@ -1116,25 +1142,12 @@ static int gmc_v8_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- /* set DMA mask + need_dma32 flags.
- * PCIE - can handle 40-bits.
- * IGP - can handle 40-bits
- * PCI - dma32 for legacy pci gart, 40 bits on newer asics
- */
- adev->need_dma32 = false;
- dma_bits = adev->need_dma32 ? 32 : 40;
- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
if (r) {
- adev->need_dma32 = true;
- dma_bits = 32;
pr_warn("amdgpu: No suitable DMA available\n");
+ return r;
}
- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
- if (r) {
- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- pr_warn("amdgpu: No coherent DMA available\n");
- }
- adev->need_swiotlb = drm_need_swiotlb(dma_bits);
+ adev->need_swiotlb = drm_need_swiotlb(40);
r = gmc_v8_0_init_microcode(adev);
if (r) {
@@ -1720,11 +1733,12 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
.set_prt = gmc_v8_0_set_prt,
- .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v8_0_get_vm_pde
+ .get_vm_pde = gmc_v8_0_get_vm_pde,
+ .get_vm_pte = gmc_v8_0_get_vm_pte
};
static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 73f3b79ab131..90216abf14a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,20 +38,27 @@
#include "dce/dce_12_0_sh_mask.h"
#include "vega10_enum.h"
#include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
#include "athub/athub_1_0_offset.h"
#include "oss/osssys_4_0_offset.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "umc/umc_6_0_sh_mask.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
+#include "athub_v1_0.h"
#include "gfxhub_v1_1.h"
+#include "mmhub_v9_4.h"
+#include "umc_v6_1.h"
+#include "umc_v6_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
@@ -202,6 +209,11 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
{
u32 bits, i, tmp, reg;
+ /* Devices newer then VEGA10/12 shall have these programming
+ sequences performed by PSP BL */
+ if (adev->asic_type >= CHIP_VEGA20)
+ return 0;
+
bits = 0x7f;
switch (state) {
@@ -240,32 +252,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry)
-{
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- amdgpu_ras_reset_gpu(adev, 0);
- return AMDGPU_RAS_UE;
-}
-
-static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct ras_common_if *ras_if = adev->gmc.ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
-
- if (!ras_if)
- return 0;
-
- ih_data.head = *ras_if;
-
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
- return 0;
-}
-
static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -284,7 +270,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
+ for (j = 0; j < adev->num_vmhubs; j++) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
@@ -295,7 +281,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
}
break;
case AMDGPU_IRQ_STATE_ENABLE:
- for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
+ for (j = 0; j < adev->num_vmhubs; j++) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
@@ -315,10 +301,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
+ struct amdgpu_vmhub *hub;
bool retry_fault = !!(entry->src_data[1] & 0x80);
uint32_t status = 0;
u64 addr;
+ char hub_name[10];
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -327,8 +314,31 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
entry->timestamp))
return 1; /* This also prevents sending it to KFD */
+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+ snprintf(hub_name, sizeof(hub_name), "mmhub0");
+ hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+ snprintf(hub_name, sizeof(hub_name), "mmhub1");
+ hub = &adev->vmhub[AMDGPU_MMHUB_1];
+ } else {
+ snprintf(hub_name, sizeof(hub_name), "gfxhub0");
+ hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ }
+
/* If it's the first fault for this address, process it normally */
+ if (retry_fault && !in_interrupt() &&
+ amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+ return 1; /* This also prevents sending it to KFD */
+
if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB_0)
+ RREG32(hub->vm_l2_pro_fault_status);
+
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
}
@@ -342,17 +352,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
dev_err(adev->dev,
"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
"pasid:%u, for process %s pid %d thread %s pid %d)\n",
- entry->vmid_src ? "mmhub" : "gfxhub",
- retry_fault ? "retry" : "no-retry",
+ hub_name, retry_fault ? "retry" : "no-retry",
entry->src_id, entry->ring_id, entry->vmid,
entry->pasid, task_info.process_name, task_info.tgid,
task_info.task_name, task_info.pid);
- dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n",
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
addr, entry->client_id);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev)) {
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, RW));
+
+ }
}
return 0;
@@ -366,7 +392,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
.set = gmc_v9_0_ecc_interrupt_state,
- .process = gmc_v9_0_process_ecc_irq,
+ .process = amdgpu_umc_process_ecc_irq,
};
static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
@@ -374,8 +400,10 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.num_types = 1;
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
- adev->gmc.ecc_irq.num_types = 1;
- adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+ }
}
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
@@ -397,6 +425,36 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)) &&
+ (!(adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id < 0x8 &&
+ adev->pdev->device == 0x15d8)));
+}
+
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -413,54 +471,172 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
*
* Flush the TLB for the requested page table using certain type.
*/
-static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned eng = 17;
- unsigned i, j;
+ u32 j, inv_req, tmp;
+ struct amdgpu_vmhub *hub;
- for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
- struct amdgpu_vmhub *hub = &adev->vmhub[i];
- u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ BUG_ON(vmhub >= adev->num_vmhubs);
- /* This is necessary for a HW workaround under SRIOV as well
- * as GFXOFF under bare metal
- */
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- !adev->in_gpu_reset) {
- uint32_t req = hub->vm_inv_eng0_req + eng;
- uint32_t ack = hub->vm_inv_eng0_ack + eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
- 1 << vmid);
- continue;
- }
+ hub = &adev->vmhub[vmhub];
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+
+ /* This is necessary for a HW workaround under SRIOV as well
+ * as GFXOFF under bare metal
+ */
+ if (adev->gfx.kiq.ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ !adev->in_gpu_reset) {
+ uint32_t req = hub->vm_inv_eng0_req + eng;
+ uint32_t ack = hub->vm_inv_eng0_ack + eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+ return;
+ }
+
+ spin_lock(&adev->gmc.invalidate_lock);
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
- spin_lock(&adev->gmc.invalidate_lock);
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
- if (tmp & (1 << vmid))
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
break;
udelay(1);
}
- spin_unlock(&adev->gmc.invalidate_lock);
- if (j < adev->usec_timeout)
- continue;
- DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+ if (j >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
+
+ /*
+ * Issue a dummy read to wait for the ACK register to be cleared
+ * to avoid a false ACK due to the new fast GRBM interface.
+ */
+ if (vmhub == AMDGPU_GFXHUB_0)
+ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
}
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (j < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ if (ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, flush_type);
+ }
+ break;
+ }
+ }
+
+ return 0;
+
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
@@ -471,6 +647,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
}
@@ -480,7 +664,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+ /* Do nothing because there's no lut register for mmhub1. */
+ if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ return;
+
+ if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -520,44 +708,25 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
+static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
-
- switch (flags & AMDGPU_VM_MTYPE_MASK) {
+ switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
+ case AMDGPU_VM_MTYPE_RW:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
case AMDGPU_VM_MTYPE_CC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
default:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
}
-
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
}
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -584,12 +753,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
}
+static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (adev->asic_type == CHIP_ARCTURUS &&
+ !(*flags & AMDGPU_PTE_SYSTEM) &&
+ mapping->bo_va->is_xgmi)
+ *flags |= AMDGPU_PTE_SNOOPED;
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
- .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v9_0_get_vm_pde
+ .map_mtype = gmc_v9_0_map_mtype,
+ .get_vm_pde = gmc_v9_0_get_vm_pde,
+ .get_vm_pte = gmc_v9_0_get_vm_pte
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -597,12 +789,55 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
}
+static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ adev->umc.funcs = &umc_v6_0_funcs;
+ break;
+ case CHIP_VEGA20:
+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+ adev->umc.funcs = &umc_v6_1_funcs;
+ break;
+ case CHIP_ARCTURUS:
+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+ adev->umc.funcs = &umc_v6_1_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ adev->mmhub.funcs = &mmhub_v1_0_funcs;
+ break;
+ case CHIP_ARCTURUS:
+ adev->mmhub.funcs = &mmhub_v9_4_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
+ gmc_v9_0_set_umc_funcs(adev);
+ gmc_v9_0_set_mmhub_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@@ -629,6 +864,8 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
return true;
case CHIP_VEGA12:
case CHIP_VEGA20:
@@ -637,136 +874,15 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
}
}
-static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
- unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
- unsigned i;
- unsigned vmhub, inv_eng;
-
- for (i = 0; i < adev->num_rings; ++i) {
- ring = adev->rings[i];
- vmhub = ring->funcs->vmhub;
-
- inv_eng = ffs(vm_inv_engs[vmhub]);
- if (!inv_eng) {
- dev_err(adev->dev, "no VM inv eng for ring %s\n",
- ring->name);
- return -EINVAL;
- }
-
- ring->vm_inv_eng = inv_eng - 1;
- vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
-
- dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
- ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
- }
-
- return 0;
-}
-
-static int gmc_v9_0_ecc_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->gmc.ras_if;
- struct ras_ih_if ih_info = {
- .cb = gmc_v9_0_process_ras_data_cb,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "umc_err_count",
- .debugfs_name = "umc_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "umc",
- };
- int r;
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
- return 0;
- }
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__UMC);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = ras_block;
-
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__UMC);
- r = 0;
- }
- goto feature;
- }
-
- ih_info.head = **ras_if;
- fs_info.head = **ras_if;
-
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
-
- amdgpu_ras_debugfs_create(adev, &fs_info);
-
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
- if (r)
- goto sysfs;
-resume:
- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
- if (r)
- goto irq;
-
- return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
-}
-
-
static int gmc_v9_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool r;
+ int r;
if (!gmc_v9_0_keep_stolen_memory(adev))
amdgpu_bo_late_init(adev);
- r = gmc_v9_0_allocate_vm_inv_eng(adev);
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
if (r)
return r;
/* Check if ecc is available */
@@ -774,11 +890,12 @@ static int gmc_v9_0_late_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
- if (adev->df_funcs->enable_ecc_force_par_wr_rmw)
- adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
+ if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_INFO("ECC is active.\n");
}
@@ -795,7 +912,7 @@ static int gmc_v9_0_late_init(void *handle)
}
}
- r = gmc_v9_0_ecc_late_init(handle);
+ r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
@@ -806,14 +923,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = 0;
- if (!amdgpu_sriov_vf(adev))
+
+ if (adev->asic_type == CHIP_ARCTURUS)
+ base = mmhub_v9_4_get_fb_location(adev);
+ else if (!amdgpu_sriov_vf(adev))
base = mmhub_v1_0_get_fb_location(adev);
+
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, mc, base);
amdgpu_gmc_gart_location(adev, mc);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
@@ -833,33 +953,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
{
- int chansize, numchan;
int r;
- if (amdgpu_sriov_vf(adev)) {
- /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
- * and DF related registers is not readable, seems hardcord is the
- * only way to set the correct vram_width
- */
- adev->gmc.vram_width = 2048;
- } else if (amdgpu_emu_mode != 1) {
- adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
- }
-
- if (!adev->gmc.vram_width) {
- /* hbm memory channel size */
- if (adev->flags & AMD_IS_APU)
- chansize = 64;
- else
- chansize = 128;
-
- numchan = adev->df_funcs->get_hbm_channel_number(adev);
- adev->gmc.vram_width = numchan * chansize;
- }
-
/* size in MB on si */
adev->gmc.mc_vram_size =
- adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
@@ -887,10 +985,12 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
case CHIP_VEGA10: /* all engines support GPUVM */
case CHIP_VEGA12: /* all engines support GPUVM */
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
case CHIP_RAVEN: /* DCE SG support */
+ case CHIP_RENOIR:
adev->gmc.gart_size = 1024ULL << 20;
break;
}
@@ -923,7 +1023,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
- u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+ u32 d1vga_control;
unsigned size;
/*
@@ -933,6 +1033,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
if (gmc_v9_0_keep_stolen_memory(adev))
return 9 * 1024 * 1024;
+ d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
@@ -940,6 +1041,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
size = (REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
@@ -967,18 +1069,47 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v9_0_sw_init(void *handle)
{
- int r;
- int dma_bits;
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v1_0_init(adev);
- mmhub_v1_0_init(adev);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_init(adev);
+ else
+ mmhub_v1_0_init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);
- adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
+
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_RAVEN:
+ adev->num_vmhubs = 2;
+
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
} else {
@@ -991,6 +1122,10 @@ static int gmc_v9_0_sw_init(void *handle)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
+ case CHIP_RENOIR:
+ adev->num_vmhubs = 2;
+
+
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
@@ -1002,6 +1137,12 @@ static int gmc_v9_0_sw_init(void *handle)
else
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
break;
+ case CHIP_ARCTURUS:
+ adev->num_vmhubs = 3;
+
+ /* Keep the vm size same with Vega20 */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
default:
break;
}
@@ -1012,17 +1153,26 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+ }
+
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
if (r)
return r;
- /* interrupt sent to DF. */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
- &adev->gmc.ecc_irq);
- if (r)
- return r;
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
/* Set the internal MC address mask
* This is the max address of the GPU's
@@ -1030,25 +1180,12 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /* set DMA mask + need_dma32 flags.
- * PCIE - can handle 44-bits.
- * IGP - can handle 44-bits
- * PCI - dma32 for legacy pci gart, 44 bits on vega10
- */
- adev->need_dma32 = false;
- dma_bits = adev->need_dma32 ? 32 : 44;
- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- adev->need_dma32 = true;
- dma_bits = 32;
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ return r;
}
- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
- if (r) {
- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
- }
- adev->need_swiotlb = drm_need_swiotlb(dma_bits);
+ adev->need_swiotlb = drm_need_swiotlb(44);
if (adev->gmc.xgmi.supported) {
r = gfxhub_v1_1_get_xgmi_info(adev);
@@ -1077,8 +1214,9 @@ static int gmc_v9_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
- adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
amdgpu_vm_manager_init(adev);
@@ -1088,28 +1226,14 @@ static int gmc_v9_0_sw_init(void *handle)
static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ void *stolen_vga_buf;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->gmc.ras_if) {
- struct ras_common_if *ras_if = adev->gmc.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- /*remove fs first*/
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- /*remove the IH*/
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
-
+ amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
if (gmc_v9_0_keep_stolen_memory(adev))
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
@@ -1123,7 +1247,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
- if (amdgpu_virt_support_skip_setting(adev))
+ if (amdgpu_sriov_vf(adev))
break;
/* fall through */
case CHIP_VEGA20:
@@ -1137,6 +1261,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
case CHIP_VEGA12:
break;
case CHIP_RAVEN:
+ /* TODO for renoir */
soc15_program_register_sequence(adev,
golden_settings_athub_1_0_0,
ARRAY_SIZE(golden_settings_athub_1_0_0));
@@ -1154,12 +1279,6 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
int r;
- bool value;
- u32 tmp;
-
- amdgpu_device_program_register_sequence(adev,
- golden_settings_vega10_hdp,
- ARRAY_SIZE(golden_settings_vega10_hdp));
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -1169,42 +1288,17 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- mmhub_v1_0_update_power_gating(adev, true);
- break;
- default:
- break;
- }
-
r = gfxhub_v1_0_gart_enable(adev);
if (r)
return r;
- r = mmhub_v1_0_gart_enable(adev);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ r = mmhub_v9_4_gart_enable(adev);
+ else
+ r = mmhub_v1_0_gart_enable(adev);
if (r)
return r;
- WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
-
- tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
- WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
-
- WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
- WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
-
- /* After HDP is initialized, flush HDP.*/
- adev->nbio_funcs->hdp_flush(adev, NULL);
-
- if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
- value = false;
- else
- value = true;
-
- gfxhub_v1_0_set_fault_enable_default(adev, value);
- mmhub_v1_0_set_fault_enable_default(adev, value);
- gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
-
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
@@ -1214,20 +1308,69 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
static int gmc_v9_0_hw_init(void *handle)
{
- int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+ int r, i;
+ u32 tmp;
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
if (adev->mode_info.num_crtc) {
- /* Lockout access through VGA aperture*/
- WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ /* Lockout access through VGA aperture*/
+ WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
- /* disable VGA render */
- WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ /* disable VGA render */
+ WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ }
}
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_vega10_hdp,
+ ARRAY_SIZE(golden_settings_vega10_hdp));
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ /* TODO for renoir */
+ mmhub_v1_0_update_power_gating(adev, true);
+ break;
+ case CHIP_ARCTURUS:
+ WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
+ WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
+
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
+
+ /* After HDP is initialized, flush HDP.*/
+ adev->nbio.funcs->hdp_flush(adev, NULL);
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ gfxhub_v1_0_set_fault_enable_default(adev, value);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_set_fault_enable_default(adev, value);
+ else
+ mmhub_v1_0_set_fault_enable_default(adev, value);
+ }
+ for (i = 0; i < adev->num_vmhubs; ++i)
+ gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
r = gmc_v9_0_gart_enable(adev);
return r;
@@ -1243,7 +1386,10 @@ static int gmc_v9_0_hw_init(void *handle)
static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
{
gfxhub_v1_0_gart_disable(adev);
- mmhub_v1_0_gart_disable(adev);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_gart_disable(adev);
+ else
+ mmhub_v1_0_gart_disable(adev);
amdgpu_gart_table_vram_unpin(adev);
}
@@ -1308,14 +1454,26 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- return mmhub_v1_0_set_clockgating(adev, state);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_set_clockgating(adev, state);
+ else
+ mmhub_v1_0_set_clockgating(adev, state);
+
+ athub_v1_0_set_clockgating(adev, state);
+
+ return 0;
}
static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- mmhub_v1_0_get_clockgating(adev, flags);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_get_clockgating(adev, flags);
+ else
+ mmhub_v1_0_get_clockgating(adev, flags);
+
+ athub_v1_0_get_clockgating(adev, flags);
}
static int gmc_v9_0_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index 5c8deac65580..e0585e8c6c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,17 +24,6 @@
#ifndef __GMC_V9_0_H__
#define __GMC_V9_0_H__
- /*
- * The latest engine allocation on gfx9 is:
- * Engine 2, 3: firmware
- * Engine 0, 1, 4~16: amdgpu ring,
- * subject to change when ring number changes
- * Engine 17: Gart flushes
- */
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-
extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
new file mode 100644
index 000000000000..0debfd9f428c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v1_0.h"
+
+#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[(*ptr)++] = 0;
+ ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+ } else {
+ ring->ring[(*ptr)++] = reg_offset;
+ ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+ }
+ ring->ring[(*ptr)++] = val;
+}
+
+static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ uint32_t reg, reg_offset, val, mask, i;
+
+ // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+ reg_offset = (reg << 2);
+ val = lower_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+ reg_offset = (reg << 2);
+ val = upper_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 3rd to 5th: issue MEM_READ commands
+ for (i = 0; i <= 2; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+ ring->ring[ptr++] = 0;
+ }
+
+ // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x13;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 7th: program mmUVD_JRBC_RB_REF_DATA
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ mask = 0x1;
+
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = 0x01400200;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = val;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[ptr++] = 0;
+ ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+ } else {
+ ring->ring[ptr++] = reg_offset;
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+ }
+ ring->ring[ptr++] = mask;
+
+ //9th to 21st: insert no-op
+ for (i = 0; i <= 12; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ring->ring[ptr++] = 0;
+ }
+
+ //22nd: reset mmUVD_JRBC_RB_RPTR
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+ reg_offset = (reg << 2);
+ val = 0;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x12;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0xffffffff);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ /* emit trap */
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG decode TRAP\n");
+
+ switch (entry->src_id) {
+ case 126:
+ amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+int jpeg_v1_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v1_0_set_dec_ring_funcs(adev);
+ jpeg_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+int jpeg_v1_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+ SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG free up sw allocation
+ */
+void jpeg_v1_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+}
+
+/**
+ * jpeg_v1_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
+{
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+ if (mode == 0) {
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ }
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ jpeg_v1_0_decode_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+}
+
+static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .nop = PACKET0(0x81ff, 0),
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .vmhub = AMDGPU_MMHUB_0,
+ .extra_dw = 64,
+ .get_rptr = jpeg_v1_0_decode_ring_get_rptr,
+ .get_wptr = jpeg_v1_0_decode_ring_get_wptr,
+ .set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
+ 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
+ .emit_ib = jpeg_v1_0_decode_ring_emit_ib,
+ .emit_fence = jpeg_v1_0_decode_ring_emit_fence,
+ .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v1_0_decode_ring_nop,
+ .insert_start = jpeg_v1_0_decode_ring_insert_start,
+ .insert_end = jpeg_v1_0_decode_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = vcn_v1_0_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
+ .set = jpeg_v1_0_set_interrupt_state,
+ .process = jpeg_v1_0_process_interrupt,
+};
+
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
new file mode 100644
index 000000000000..bbf33a6a3972
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V1_0_H__
+#define __JPEG_V1_0_H__
+
+int jpeg_v1_0_early_init(void *handle);
+int jpeg_v1_0_sw_init(void *handle);
+void jpeg_v1_0_sw_fini(void *handle);
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+
+#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
new file mode 100644
index 000000000000..ff2e6e1ccde7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v2_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v2_0_set_dec_ring_funcs(adev);
+ jpeg_v2_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (!r)
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+ jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_0_hw_init(adev);
+
+ return r;
+}
+
+static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int r = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ SOC15_WAIT_ON_RREG(JPEG, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ return 0;
+}
+
+static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data;
+ int r = 0;
+
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS));
+ data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ data |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v2_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v2_0_disable_clock_gating(adev);
+
+ WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable JPEG CGC */
+ jpeg_v2_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v2_0_enable_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ if (jpeg_v2_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v2_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_0_stop(adev);
+ else
+ ret = jpeg_v2_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
+ .name = "jpeg_v2_0",
+ .early_init = jpeg_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_0_sw_init,
+ .sw_fini = jpeg_v2_0_sw_fini,
+ .hw_init = jpeg_v2_0_hw_init,
+ .hw_fini = jpeg_v2_0_hw_fini,
+ .suspend = jpeg_v2_0_suspend,
+ .resume = jpeg_v2_0_resume,
+ .is_idle = jpeg_v2_0_is_idle,
+ .wait_for_idle = jpeg_v2_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = jpeg_v2_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
+ .set = jpeg_v2_0_set_interrupt_state,
+ .process = jpeg_v2_0_process_interrupt,
+};
+
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
new file mode 100644
index 000000000000..15a344ed340f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_0_H__
+#define __JPEG_V2_0_H__
+
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr);
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+
+extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
+
+#endif /* __JPEG_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
new file mode 100644
index 000000000000..c6d046df4b70
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -0,0 +1,641 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * jpeg_v2_5_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_5_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ u32 harvest;
+ int i;
+
+ adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->jpeg.harvest_config |= 1 << i;
+ }
+
+ if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 |
+ AMDGPU_JPEG_HARVEST_JPEG1))
+ return -ENOENT;
+ } else
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v2_5_set_dec_ring_funcs(adev);
+ jpeg_v2_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_5_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_5_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_5_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_5_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
+ jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_5_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_5_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_5_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_5_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_5_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v2_5_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+
+ return ret;
+}
+
+static int jpeg_v2_5_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int jpeg_v2_5_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (jpeg_v2_5_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v2_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_5_stop(adev);
+ else
+ ret = jpeg_v2_5_start(adev);
+
+ if(!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
+ .name = "jpeg_v2_5",
+ .early_init = jpeg_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_5_sw_init,
+ .sw_fini = jpeg_v2_5_sw_fini,
+ .hw_init = jpeg_v2_5_hw_init,
+ .hw_fini = jpeg_v2_5_hw_fini,
+ .suspend = jpeg_v2_5_suspend,
+ .resume = jpeg_v2_5_resume,
+ .is_idle = jpeg_v2_5_is_idle,
+ .wait_for_idle = jpeg_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_5_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec.me = i;
+ DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
+ .set = jpeg_v2_5_set_interrupt_state,
+ .process = jpeg_v2_5_process_interrupt,
+};
+
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &jpeg_v2_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
new file mode 100644
index 000000000000..2b4087c02620
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_5_H__
+#define __JPEG_V2_5_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
+
+#endif /* __JPEG_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index dc5ce03034d3..49a3a56ec017 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -21,15 +21,14 @@
*
*/
#include "amdgpu.h"
+#include "amdgpu_ras.h"
#include "mmhub_v1_0.h"
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
#include "mmhub/mmhub_1_0_default.h"
-#include "athub/athub_1_0_offset.h"
-#include "athub/athub_1_0_sh_mask.h"
#include "vega10_enum.h"
-
+#include "soc15.h"
#include "soc15_common.h"
#define mmDAGB0_CNTL_MISC2_RV 0x008f
@@ -111,7 +110,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
- if (amdgpu_virt_support_skip_setting(adev))
+ if (amdgpu_sriov_vf(adev))
return;
/* Set default page address. */
@@ -159,7 +158,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
- if (amdgpu_virt_support_skip_setting(adev))
+ if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
@@ -203,12 +202,14 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
- if (amdgpu_virt_support_skip_setting(adev))
+ if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
@@ -348,7 +349,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
- if (!amdgpu_virt_support_skip_setting(adev)) {
+ if (!amdgpu_sriov_vf(adev)) {
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
@@ -367,7 +368,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
- if (amdgpu_virt_support_skip_setting(adev))
+ if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
@@ -407,7 +408,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
void mmhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -415,6 +416,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
@@ -491,22 +494,6 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);
}
-static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
- bool enable)
-{
- uint32_t def, data;
-
- def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
- data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
- else
- data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
-
- if (def != data)
- WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
-}
-
static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
@@ -523,23 +510,6 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
}
-static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
- bool enable)
-{
- uint32_t def, data;
-
- def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
- (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
- data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
- else
- data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
-
- if(def != data)
- WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
-}
-
int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
@@ -551,14 +521,11 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- athub_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v1_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- athub_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -569,18 +536,218 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
{
- int data;
+ int data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
+ data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+
/* AMD_CG_SUPPORT_MC_MGCG */
- data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
- if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ if ((data & ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
*flags |= AMD_CG_SUPPORT_MC_MGCG;
/* AMD_CG_SUPPORT_MC_LS */
- data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_LS;
}
+
+static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = {
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0},
+};
+
+static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) {
+ if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v1_0_ras_fields[i].sec_count_mask) >>
+ mmhub_v1_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v1_0_ras_fields[i].ded_count_mask) >>
+ mmhub_v1_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
+ .ras_late_init = amdgpu_mmhub_ras_late_init,
+ .query_ras_error_count = mmhub_v1_0_query_ras_error_count,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index 0de0fdf98c00..c43319e8f945 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -23,6 +23,8 @@
#ifndef __MMHUB_V1_0_H__
#define __MMHUB_V1_0_H__
+extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs;
+
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev);
int mmhub_v1_0_gart_enable(struct amdgpu_device *adev);
void mmhub_v1_0_gart_disable(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 0f9549f19ade..bde189680521 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -31,20 +31,25 @@
#include "soc15_common.h"
-static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
{
- uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+ /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */
+ int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- lower_32_bits(value));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ offset * vmid, lower_32_bits(page_table_base));
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- upper_32_bits(value));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ offset * vmid, upper_32_bits(page_table_base));
}
static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
- mmhub_v2_0_init_gart_pt_regs(adev);
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
@@ -126,7 +131,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
- tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
@@ -137,6 +142,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
tmp = mmMMVM_L2_CNTL4_DEFAULT;
@@ -152,6 +166,8 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
}
@@ -324,7 +340,7 @@ void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
void mmhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -332,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
@@ -406,10 +424,12 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
mmhub_v2_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
index db16f3ece218..3ea4344f0315 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev);
int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
new file mode 100644
index 000000000000..a5281df8d84f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -0,0 +1,1602 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "mmhub_v9_4.h"
+
+#include "mmhub/mmhub_9_4_1_offset.h"
+#include "mmhub/mmhub_9_4_1_sh_mask.h"
+#include "mmhub/mmhub_9_4_1_default.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "soc15.h"
+#include "soc15_common.h"
+
+#define MMHUB_NUM_INSTANCES 2
+#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000
+
+u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
+{
+ /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */
+ u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP);
+
+ base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+ uint32_t vmid, uint64_t value)
+{
+ /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
+ * mmVML2VC0_VM_CONTEXT1_*
+ */
+ int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ lower_32_bits(value));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ upper_32_bits(value));
+
+}
+
+static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++)
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid,
+ page_table_base);
+}
+
+static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ adev->gmc.agp_end >> 24);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ adev->gmc.agp_start >> 24);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(value >> 12));
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ tmp);
+ }
+}
+
+static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ATC_EN, 1);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
+ INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
+ INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
+ int hubid)
+{
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+}
+
+static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+ int i;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i,
+ tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
+ int hubid)
+{
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
+ 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
+ 0x1f);
+ }
+}
+
+int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ /* GART Enable. */
+ mmhub_v9_4_init_gart_aperture_regs(adev, i);
+ mmhub_v9_4_init_system_aperture_regs(adev, i);
+ mmhub_v9_4_init_tlb_regs(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_init_cache_regs(adev, i);
+
+ mmhub_v9_4_enable_system_domain(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_disable_identity_aperture(adev, i);
+ mmhub_v9_4_setup_vmid_config(adev, i);
+ mmhub_v9_4_program_invalidation(adev, i);
+ }
+
+ return 0;
+}
+
+void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i, j;
+
+ for (j = 0; j < MMHUB_NUM_INSTANCES; j++) {
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_CACHE, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ }
+}
+
+/**
+ * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+ }
+}
+
+void mmhub_v9_4_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] =
+ {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]};
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ hub[i]->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ACK) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_CNTL) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ }
+}
+
+static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1;
+ int i, j;
+ int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data);
+
+ for (j = 0; j < 5; j++) {
+ def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_CNTL_MISC2,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET +
+ j * dist);
+ if (enable &&
+ (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data1 &=
+ ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ } else {
+ data1 |=
+ (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def1 != data1)
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_CNTL_MISC2,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET +
+ j * dist, data1);
+
+ if (i == 1 && j == 3)
+ break;
+ }
+ }
+}
+
+static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data);
+ }
+}
+
+int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ mmhub_v9_4_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v9_4_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG);
+
+ if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = {
+ /* MMHUB Range 0 */
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 1 */
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHAB Range 2*/
+ { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Rang 3 */
+ { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 4 */
+ { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUAB Range 5 */
+ { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 6 */
+ { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 7*/
+ { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 },
+};
+
+static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
+ if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v9_4_ras_fields[i].sec_count_mask) >>
+ mmhub_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v9_4_ras_fields[i].ded_count_mask) >>
+ mmhub_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+ .ras_late_init = amdgpu_mmhub_ras_late_init,
+ .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
new file mode 100644
index 000000000000..1b979773776c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V9_4_H__
+#define __MMHUB_V9_4_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+
+u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev);
+int mmhub_v9_4_gart_enable(struct amdgpu_device *adev);
+void mmhub_v9_4_gart_disable(struct amdgpu_device *adev);
+void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value);
+void mmhub_v9_4_init(struct amdgpu_device *adev);
+int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 8af0bddf85e4..20958639b601 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -47,6 +47,18 @@ struct mmsch_v1_0_init_header {
uint32_t uvd_table_size;
};
+struct mmsch_vf_eng_init_header {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v1_1_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_vf_eng_init_header eng[2];
+};
+
struct mmsch_v1_0_cmd_direct_reg_header {
uint32_t reg_offset : 28;
uint32_t command_type : 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 235548c0b41f..5fd67e1cc2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -158,82 +158,6 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_ai_mailbox_set_valid(adev, false);
}
-static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf)
-{
- int r = 0;
- u32 req, val, size;
-
- if (!amdgim_is_hwperf(adev) || buf == NULL)
- return -EBADRQC;
-
- switch(type) {
- case PP_SCLK:
- req = IDH_IRQ_GET_PP_SCLK;
- break;
- case PP_MCLK:
- req = IDH_IRQ_GET_PP_MCLK;
- break;
- default:
- return -EBADRQC;
- }
-
- mutex_lock(&adev->virt.dpm_mutex);
-
- xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
-
- r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
- if (!r && adev->fw_vram_usage.va != NULL) {
- val = RREG32_NO_KIQ(
- SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1));
- size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) +
- val), PAGE_SIZE);
-
- if (size < PAGE_SIZE)
- strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val));
- else
- size = 0;
-
- r = size;
- goto out;
- }
-
- r = xgpu_ai_poll_msg(adev, IDH_FAIL);
- if(r)
- pr_info("%s DPM request failed",
- (type == PP_SCLK)? "SCLK" : "MCLK");
-
-out:
- mutex_unlock(&adev->virt.dpm_mutex);
- return r;
-}
-
-static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level)
-{
- int r = 0;
- u32 req = IDH_IRQ_FORCE_DPM_LEVEL;
-
- if (!amdgim_is_hwperf(adev))
- return -EBADRQC;
-
- mutex_lock(&adev->virt.dpm_mutex);
- xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0);
-
- r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
- if (!r)
- goto out;
-
- r = xgpu_ai_poll_msg(adev, IDH_FAIL);
- if (!r)
- pr_info("DPM request failed");
- else
- pr_info("Mailbox is broken");
-
-out:
- mutex_unlock(&adev->virt.dpm_mutex);
- return r;
-}
-
static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
enum idh_request req)
{
@@ -326,7 +250,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
*/
locked = mutex_trylock(&adev->lock_reset);
if (locked)
- adev->in_gpu_reset = 1;
+ adev->in_gpu_reset = true;
do {
if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -338,7 +262,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
flr_done:
if (locked) {
- adev->in_gpu_reset = 0;
+ adev->in_gpu_reset = false;
mutex_unlock(&adev->lock_reset);
}
@@ -449,27 +373,10 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
-static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
-{
- adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
-
- /* Enable L1 security reg access mode by defaul, as non-security VF
- * will no longer be supported.
- */
- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
-
- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
-
- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
-}
-
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
.reset_gpu = xgpu_ai_request_reset,
.wait_reset = NULL,
.trans_msg = xgpu_ai_mailbox_trans_msg,
- .get_pp_clk = xgpu_ai_get_pp_clk,
- .force_dpm_level = xgpu_ai_force_dpm_level,
- .init_reg_access_mode = xgpu_ai_init_reg_access_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 077e91a33d62..37dbe0f2142f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -35,10 +35,6 @@ enum idh_request {
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
- IDH_IRQ_FORCE_DPM_LEVEL = 10,
- IDH_IRQ_GET_PP_SCLK,
- IDH_IRQ_GET_PP_MCLK,
-
IDH_LOG_VF_ERROR = 200,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
new file mode 100644
index 000000000000..237fa5e16b7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "soc15.h"
+#include "navi10_ih.h"
+#include "soc15_common.h"
+#include "mxgpu_nv.h"
+#include "mxgpu_ai.h"
+
+static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
+{
+ WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
+}
+
+static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+ WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
+
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+ return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+}
+
+
+static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
+ enum idh_event event)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+ if (reg != event)
+ return -ENOENT;
+
+ xgpu_nv_mailbox_send_ack(adev);
+
+ return 0;
+}
+
+static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
+{
+ return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
+static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT;
+ u8 reg;
+
+ do {
+ reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+ if (reg & 2)
+ return 0;
+
+ mdelay(5);
+ timeout -= 5;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+
+ return -ETIME;
+}
+
+static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+ int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+
+ do {
+ r = xgpu_nv_mailbox_rcv_msg(adev, event);
+ if (!r)
+ return 0;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
+{
+ u32 reg;
+ int r;
+ uint8_t trn;
+
+ /* IMPORTANT:
+ * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+ * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+ * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack()
+ * will return immediatly
+ */
+ do {
+ xgpu_nv_mailbox_set_valid(adev, false);
+ trn = xgpu_nv_peek_ack(adev);
+ if (trn) {
+ pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ msleep(1);
+ }
+ } while (trn);
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0));
+ reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0,
+ MSGBUF_DATA, req);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0),
+ reg);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1),
+ data1);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2),
+ data2);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3),
+ data3);
+
+ xgpu_nv_mailbox_set_valid(adev, true);
+
+ /* start to poll ack */
+ r = xgpu_nv_poll_ack(adev);
+ if (r)
+ pr_err("Doesn't get ack from pf, continue\n");
+
+ xgpu_nv_mailbox_set_valid(adev, false);
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ int r;
+
+ xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ /* start to check msg if request is idh_req_gpu_init_access */
+ if (req == IDH_REQ_GPU_INIT_ACCESS ||
+ req == IDH_REQ_GPU_FINI_ACCESS ||
+ req == IDH_REQ_GPU_RESET_ACCESS) {
+ r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+ if (r) {
+ pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ return r;
+ }
+ /* Retrieve checksum from mailbox2 */
+ if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
+ adev->virt.fw_reserve.checksum_key =
+ RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2));
+ }
+ }
+
+ return 0;
+}
+
+static int xgpu_nv_request_reset(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+}
+
+static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+
+ req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+ return xgpu_nv_send_access_requests(adev, req);
+}
+
+static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+ int r = 0;
+
+ req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+ r = xgpu_nv_send_access_requests(adev, req);
+
+ return r;
+}
+
+static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("get ack intr and do nothing.\n");
+ return 0;
+}
+
+static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+ int locked;
+
+ /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+ * otherwise the mailbox msg will be ruined/reseted by
+ * the VF FLR.
+ *
+ * we can unlock the lock_reset to allow "amdgpu_job_timedout"
+ * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
+ * which means host side had finished this VF's FLR.
+ */
+ locked = mutex_trylock(&adev->lock_reset);
+ if (locked)
+ adev->in_gpu_reset = true;
+
+ do {
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+ goto flr_done;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+flr_done:
+ if (locked) {
+ adev->in_gpu_reset = false;
+ mutex_unlock(&adev->lock_reset);
+ }
+
+ /* Trigger recovery for world switch failure if no TDR */
+ if (amdgpu_device_should_recover_gpu(adev)
+ && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
+ amdgpu_device_gpu_recover(adev, NULL);
+}
+
+static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+
+ switch (event) {
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.flr_work);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_ack_irq,
+ .process = xgpu_nv_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_rcv_irq,
+ .process = xgpu_nv_mailbox_rcv_irq,
+};
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->virt.ack_irq.num_types = 1;
+ adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs;
+ adev->virt.rcv_irq.num_types = 1;
+ adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ return 0;
+}
+
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+ if (r)
+ return r;
+ r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+
+ return 0;
+}
+
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
+{
+ amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
+ .req_full_gpu = xgpu_nv_request_full_gpu_access,
+ .rel_full_gpu = xgpu_nv_release_full_gpu_access,
+ .reset_gpu = xgpu_nv_request_reset,
+ .wait_reset = NULL,
+ .trans_msg = xgpu_nv_mailbox_trans_msg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
new file mode 100644
index 000000000000..99b15f6865cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MXGPU_NV_H__
+#define __MXGPU_NV_H__
+
+#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 12000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500
+
+extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev);
+
+#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4)
+#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index e963746be11c..cf557a428298 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -21,7 +21,8 @@
*
*/
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
@@ -109,14 +110,13 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl
static int navi10_ih_irq_init(struct amdgpu_device *adev)
{
struct amdgpu_ih_ring *ih = &adev->irq.ih;
- int ret = 0;
u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken;
u32 tmp;
/* disable irqs */
navi10_ih_disable_interrupts(adev);
- adev->nbio_funcs->ih_control(adev);
+ adev->nbio.funcs->ih_control(adev);
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8);
@@ -161,7 +161,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
}
WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
- adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell,
+ adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell,
ih->doorbell_index);
tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
@@ -178,7 +178,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
navi10_ih_enable_interrupts(adev);
- return ret;
+ return 0;
}
/**
@@ -426,7 +426,7 @@ static int navi10_ih_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
navi10_ih_update_clockgating_state(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
index 55014ce8670a..88efaecf9f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
@@ -24,25 +24,12 @@
#include "nv.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "navi10_ip_offset.h"
int navi10_reg_base_init(struct amdgpu_device *adev)
{
- int r, i;
+ int i;
- if (amdgpu_discovery) {
- r = amdgpu_discovery_reg_base_init(adev);
- if (r) {
- DRM_WARN("failed to init reg base from ip discovery table, "
- "fallback to legacy init method\n");
- goto legacy_init;
- }
-
- return 0;
- }
-
-legacy_init:
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
new file mode 100644
index 000000000000..a786d159e5e9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "navi12_ip_offset.h"
+
+int navi12_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocks needed by driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
new file mode 100644
index 000000000000..4ea1e8fbb601
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "navi14_ip_offset.h"
+
+int navi14_reg_base_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 835d7b1a841f..f3a3fe746222 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -27,11 +27,21 @@
#include "nbio/nbio_2_3_default.h"
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
+
+static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
@@ -92,7 +101,7 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
}
static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
- int doorbell_index)
+ int doorbell_index, int instance)
{
u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
@@ -311,7 +320,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
- .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset,
@@ -331,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.ih_control = nbio_v2_3_ih_control,
.init_registers = nbio_v2_3_init_registers,
.detect_hw_virt = nbio_v2_3_detect_hw_virt,
+ .remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
index 5ae52085f6b7..a43b60acf7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 6590143c3f75..635d9e1fc0a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
-static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -277,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
- .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
index 0743a6f016f3..6dc743b73218 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 73419fa38159..d6cbf26074bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -91,6 +91,26 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
WREG32(reg, doorbell_range);
}
+static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
@@ -272,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
- .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset,
@@ -282,6 +301,7 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.hdp_flush = nbio_v7_0_hdp_flush,
.get_memsize = nbio_v7_0_get_memsize,
.sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range,
.enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_0_ih_doorbell_range,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
index 508d549c5029..e7aefb252550 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index bfaaa327ae3c..65eb378fa035 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -23,14 +23,38 @@
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
+#include "amdgpu_ras.h"
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#include "nbio/nbio_7_4_0_smn.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+/*
+ * These are nbio v7_4_1 registers mask. Temporarily define these here since
+ * nbio v7_4_1 header is incomplete.
+ */
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc
+#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
+//BIF_MMSCH1_DOORBELL_RANGE
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -75,10 +99,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index, int doorbell_size)
{
- u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
- SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+ u32 reg, doorbell_range;
- u32 doorbell_range = RREG32(reg);
+ if (instance < 2)
+ reg = instance +
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
+ else
+ /*
+ * These registers address of SDMA2~7 is not consecutive
+ * from SDMA0~1. Need plus 4 dwords offset.
+ *
+ * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
+ * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
+ * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
+ */
+ reg = instance + 0x4 +
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
+
+ doorbell_range = RREG32(reg);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
@@ -89,6 +127,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
WREG32(reg, doorbell_range);
}
+static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg;
+ u32 doorbell_range;
+
+ if (instance)
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE);
+ else
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
@@ -207,7 +271,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
-static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -220,6 +284,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+ .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
+ .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
+ .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
+ .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
+ .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
+ .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
};
static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
@@ -241,17 +311,224 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
- uint32_t def, data;
- def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
- data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
+}
- if (def != data)
- WREG32_PCIE(smnPCIE_CI_CNTL, data);
+static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ /*
+ * clear error status after ras_controller_intr according to
+ * hw team and count ue number for query
+ */
+ nbio_v7_4_query_ras_error_count(adev, &obj->err_data);
+
+ DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
+
+ /* ras_controller_int is dedicated for nbif ras error,
+ * not the global interrupt for sync flood
+ */
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
+
+static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+
+static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = {
+ .set = nbio_v7_4_set_ras_controller_irq_state,
+ .process = nbio_v7_4_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_4_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_4_process_err_event_athub_irq,
+};
+
+static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_4_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_4_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ uint32_t global_sts, central_sts, int_eoi, parity_sts;
+ uint32_t corr, fatal, non_fatal;
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+ corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr);
+ fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
+ non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
+ ParityErrNonFatal);
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
+
+ if (corr)
+ err_data->ce_count++;
+ if (fatal)
+ err_data->ue_count++;
+
+ if (corr || fatal || non_fatal) {
+ central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS);
+ /* clear error status register */
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+
+ if (fatal)
+ /* clear parity fatal error indication field */
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+ parity_sts);
+
+ if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
+ BIFL_RasContller_Intr_Recv)) {
+ /* clear interrupt status register */
+ WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts);
+ int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI);
+ int_eoi = REG_SET_FIELD(int_eoi,
+ IOHC_INTERRUPT_EOI, SMI_EOI, 1);
+ WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi);
+ }
+ }
+}
+
+static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
- .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
@@ -261,9 +538,11 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.hdp_flush = nbio_v7_4_hdp_flush,
.get_memsize = nbio_v7_4_get_memsize,
.sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range,
.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt,
.update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
.update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
.get_clockgating_state = nbio_v7_4_get_clockgating_state,
@@ -271,4 +550,10 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
+ .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
+ .query_ras_error_count = nbio_v7_4_query_ras_error_count,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index c442865bac4f..b1ac82872752 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 662612f89c70..2d1bebdf1603 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -23,7 +23,8 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -39,19 +40,23 @@
#include "gc/gc_10_1_0_sh_mask.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
#include "gmc_v10_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
#include "nv.h"
#include "navi10_ih.h"
#include "gfx_v10_0.h"
#include "sdma_v5_0.h"
#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
#include "dce_virtual.h"
#include "mes_v10_1.h"
+#include "mxgpu_nv.h"
static const struct amd_ip_funcs nv_common_ip_funcs;
@@ -62,8 +67,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
u32 r;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -77,8 +82,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -118,7 +123,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static u32 nv_get_config_memsize(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_memsize(adev);
+ return adev->nbio.funcs->get_memsize(adev);
}
static u32 nv_get_xclk(struct amdgpu_device *adev)
@@ -153,8 +158,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev)
static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
{
- /* TODO: will implement it when SMU header is available */
- return false;
+ u32 *dw_ptr;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ /* set rom index to 0 */
+ WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+
+ return true;
}
static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
@@ -175,6 +199,7 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -278,7 +303,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio_funcs->get_memsize(adev);
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
if (memsize != 0xffffffff)
break;
@@ -289,6 +314,28 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
return ret;
}
+
+static bool nv_asic_supports_baco(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = &adev->smu;
+
+ if (smu_baco_is_support(smu))
+ return true;
+ else
+ return false;
+}
+
+static enum amd_reset_method
+nv_asic_reset_method(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = &adev->smu;
+
+ if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+}
+
static int nv_asic_reset(struct amdgpu_device *adev)
{
@@ -303,10 +350,20 @@ static int nv_asic_reset(struct amdgpu_device *adev)
int ret = 0;
struct smu_context *smu = &adev->smu;
- if (smu_baco_is_support(smu))
- ret = smu_baco_reset(smu);
- else
+ if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ ret = smu_baco_enter(smu);
+ if (ret)
+ return ret;
+ ret = smu_baco_exit(smu);
+ if (ret)
+ return ret;
+ } else {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
ret = nv_asic_mode1_reset(adev);
+ }
return ret;
}
@@ -350,8 +407,8 @@ static void nv_program_aspm(struct amdgpu_device *adev)
static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
- adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
}
static const struct amdgpu_ip_block_version nv_common_ip_block =
@@ -363,29 +420,65 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
.funcs = &nv_common_ip_funcs,
};
-int nv_set_ip_blocks(struct amdgpu_device *adev)
+static int nv_reg_base_init(struct amdgpu_device *adev)
{
- /* Set IP register base before any HW register access */
+ int r;
+
+ if (amdgpu_discovery) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r) {
+ DRM_WARN("failed to init reg base from ip discovery table, "
+ "fallback to legacy init method\n");
+ goto legacy_init;
+ }
+
+ return 0;
+ }
+
+legacy_init:
switch (adev->asic_type) {
case CHIP_NAVI10:
navi10_reg_base_init(adev);
break;
+ case CHIP_NAVI14:
+ navi14_reg_base_init(adev);
+ break;
+ case CHIP_NAVI12:
+ navi12_reg_base_init(adev);
+ break;
default:
return -EINVAL;
}
- adev->nbio_funcs = &nbio_v2_3_funcs;
+ return 0;
+}
- adev->nbio_funcs->detect_hw_virt(adev);
+int nv_set_ip_blocks(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* Set IP register base before any HW register access */
+ r = nv_reg_base_init(adev);
+ if (r)
+ return r;
+
+ adev->nbio.funcs = &nbio_v2_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
+
+ adev->nbio.funcs->detect_hw_virt(adev);
+
+ if (amdgpu_sriov_vf(adev))
+ adev->virt.ops = &xgpu_nv_virt_ops;
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -396,12 +489,35 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
if (adev->enable_mes)
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
break;
+ case CHIP_NAVI12:
+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ !amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+ !amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+ break;
default:
return -EINVAL;
}
@@ -411,12 +527,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_rev_id(adev);
+ return adev->nbio.funcs->get_rev_id(adev);
}
static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- adev->nbio_funcs->hdp_flush(adev, ring);
+ adev->nbio.funcs->hdp_flush(adev, ring);
}
static void nv_invalidate_hdp(struct amdgpu_device *adev,
@@ -461,6 +577,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+
+ return 0;
+}
+
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -496,6 +622,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.read_bios_from_rom = &nv_read_bios_from_rom,
.read_register = &nv_read_register,
.reset = &nv_asic_reset,
+ .reset_method = &nv_asic_reset_method,
.set_vga_state = &nv_vga_set_state,
.get_xclk = &nv_get_xclk,
.set_uvd_clocks = &nv_set_uvd_clocks,
@@ -507,13 +634,17 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.need_full_reset = &nv_need_full_reset,
.get_pcie_usage = &nv_get_pcie_usage,
.need_reset_on_init = &nv_need_reset_on_init,
+ .get_pcie_replay_count = &nv_get_pcie_replay_count,
+ .supports_baco = &nv_asic_supports_baco,
};
static int nv_common_early_init(void *handle)
{
- bool psp_enabled = false;
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &nv_pcie_rreg;
@@ -528,10 +659,6 @@ static int nv_common_early_init(void *handle)
adev->asic_funcs = &nv_asic_funcs;
- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
- (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
- psp_enabled = true;
-
adev->rev_id = nv_get_rev_id(adev);
adev->external_rev_id = 0xff;
switch (adev->asic_type) {
@@ -548,29 +675,95 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
- AMD_PG_SUPPORT_MMHUB |
+ AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
adev->external_rev_id = adev->rev_id + 0x1;
break;
+ case CHIP_NAVI14:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 20;
+ break;
+ case CHIP_NAVI12:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB;
+ /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+ * as a consequence, the rev_id and external_rev_id are wrong.
+ * workaround it by hardcoding rev_id to 0 (default value).
+ */
+ if (amdgpu_sriov_vf(adev))
+ adev->rev_id = 0;
+ adev->external_rev_id = adev->rev_id + 0xa;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
}
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
return 0;
}
static int nv_common_late_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_get_irq(adev);
+
return 0;
}
static int nv_common_sw_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
return 0;
}
@@ -588,7 +781,13 @@ static int nv_common_hw_init(void *handle)
/* enable aspm */
nv_program_aspm(adev);
/* setup nbio registers */
- adev->nbio_funcs->init_registers(adev);
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
nv_enable_doorbell_aperture(adev, true);
@@ -748,14 +947,16 @@ static int nv_common_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
nv_update_hdp_mem_power_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
nv_update_hdp_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -778,7 +979,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio_funcs->get_clockgating_state(adev, flags);
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
/* AMD_CG_SUPPORT_HDP_MGCG */
tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 639c54933cc5..82e6cb432f3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -30,4 +30,6 @@ void nv_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int nv_set_ip_blocks(struct amdgpu_device *adev);
int navi10_reg_base_init(struct amdgpu_device *adev);
+int navi14_reg_base_init(struct amdgpu_device *adev);
+int navi12_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 5080a73a95a5..36b65797434e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -233,8 +233,16 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */
GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */
GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */
- GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */
- GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */
+ GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */
+ GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */
+ GFX_FW_TYPE_DMUB = 51, /* DMUB RN */
+ GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */
+ GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */
+ GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */
+ GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */
+ GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
+ GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
+ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
GFX_FW_TYPE_MAX
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index ce1ea31feee0..7539104175e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -40,6 +40,9 @@
MODULE_FIRMWARE("amdgpu/raven_asd.bin");
MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
static int psp_v10_0_init_microcode(struct psp_context *psp)
{
@@ -48,7 +51,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *hdr;
-
+ const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
switch (adev->asic_type) {
@@ -79,7 +82,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
adev->psp.asd_start_addr = (uint8_t *)hdr +
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+ if (err) {
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
+ dev_info(adev->dev,
+ "psp v10.0: Failed to load firmware \"%s\"\n",
+ fw_name);
+ } else {
+ err = amdgpu_ucode_validate(adev->psp.ta_fw);
+ if (err)
+ goto out2;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *)
+ adev->psp.ta_fw->data;
+ adev->psp.ta_hdcp_ucode_version =
+ le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+ adev->psp.ta_hdcp_ucode_size =
+ le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+ adev->psp.ta_hdcp_start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+ adev->psp.ta_dtm_ucode_version =
+ le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+ adev->psp.ta_dtm_ucode_size =
+ le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+ adev->psp.ta_dtm_start_addr =
+ (uint8_t *)adev->psp.ta_hdcp_start_addr +
+ le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+ }
+
return 0;
+
+out2:
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
out:
if (err) {
dev_err(adev->dev,
@@ -189,54 +230,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v10_0_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v10_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -366,15 +359,30 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp)
return -EINVAL;
}
+static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+}
+
+static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v10_0_funcs = {
.init_microcode = psp_v10_0_init_microcode,
.ring_init = psp_v10_0_ring_init,
.ring_create = psp_v10_0_ring_create,
.ring_stop = psp_v10_0_ring_stop,
.ring_destroy = psp_v10_0_ring_destroy,
- .cmd_submit = psp_v10_0_cmd_submit,
.compare_sram_data = psp_v10_0_compare_sram_data,
.mode1_reset = psp_v10_0_mode1_reset,
+ .ring_get_wptr = psp_v10_0_ring_get_wptr,
+ .ring_set_wptr = psp_v10_0_ring_set_wptr,
};
void psp_v10_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 41b72588adcf..0829188c1a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -22,6 +22,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
+#include <linux/vmalloc.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
@@ -43,6 +44,16 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi10_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -51,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62
#define mmSDMA0_UCODE_ADDR_NV10 0x5880
#define mmSDMA0_UCODE_DATA_NV10 0x5881
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
@@ -60,6 +73,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
int err = 0;
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
+ const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_0 *asd_hdr;
const struct ta_firmware_header_v1_0 *ta_hdr;
@@ -72,6 +86,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_NAVI10:
chip_name = "navi10";
break;
+ case CHIP_NAVI14:
+ chip_name = "navi14";
+ break;
+ case CHIP_NAVI12:
+ chip_name = "navi12";
+ break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
default:
BUG();
}
@@ -107,6 +130,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);
}
+ if (sos_hdr->header.header_version_minor == 2) {
+ sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
+ adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes);
+ adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
+ }
break;
default:
dev_err(adev->dev,
@@ -133,6 +162,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
switch (adev->asic_type) {
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
if (err) {
@@ -158,6 +188,33 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
}
break;
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+ if (err) {
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
+ dev_info(adev->dev,
+ "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+ } else {
+ err = amdgpu_ucode_validate(adev->psp.ta_fw);
+ if (err)
+ goto out2;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+ adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+ adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+ adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+ adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+ adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+ adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr +
+ le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+ }
break;
default:
BUG();
@@ -180,26 +237,55 @@ out:
return err;
}
+int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000,
+ 0x80000000,
+ false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check tOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg) {
+ if (psp_v11_0_is_sos_alive(psp)) {
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
return 0;
}
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -208,16 +294,14 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
/* Copy PSP KDB binary to memory */
memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size);
- /* Provide the sys driver to bootloader */
+ /* Provide the PSP KDB to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
return ret;
}
@@ -227,21 +311,17 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg) {
+ if (psp_v11_0_is_sos_alive(psp)) {
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
return 0;
}
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -260,8 +340,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
return ret;
}
@@ -271,18 +350,14 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
int ret;
unsigned int psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg)
+ if (psp_v11_0_is_sos_alive(psp))
return 0;
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -373,6 +448,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp)
return false;
}
+static int psp_v11_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (psp_v11_0_support_vmr_ring(psp))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (psp_v11_0_support_vmr_ring(psp))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
static int psp_v11_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -382,6 +485,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
struct amdgpu_device *adev = psp->adev;
if (psp_v11_0_support_vmr_ring(psp)) {
+ ret = psp_v11_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -401,6 +510,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
0x80000000, 0x8000FFFF, false);
} else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
+ return ret;
+ }
+
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
@@ -426,33 +543,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
return ret;
}
-static int psp_v11_0_ring_stop(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Write the ring destroy command*/
- if (psp_v11_0_support_vmr_ring(psp))
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
- else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
- GFX_CTRL_CMD_ID_DESTROY_RINGS);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) */
- if (psp_v11_0_support_vmr_ring(psp))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
- else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
-
- return ret;
-}
static int psp_v11_0_ring_destroy(struct psp_context *psp,
enum psp_ring_type ring_type)
@@ -472,63 +562,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v11_0_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- if (psp_v11_0_support_vmr_ring(psp))
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
- else
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- /* write_frame ptr increments by size of rb_frame in bytes */
- /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- if (psp_v11_0_support_vmr_ring(psp)) {
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
- } else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v11_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -865,6 +898,216 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
return psp_rlc_autoload_start(psp);
}
+static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ DRM_DEBUG("training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+static void psp_v11_0_memory_training_fini(struct psp_context *psp)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ kfree(ctx->sys_cache);
+ ctx->sys_cache = NULL;
+}
+
+static int psp_v11_0_memory_training_init(struct psp_context *psp)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+ DRM_DEBUG("memory training is not supported!\n");
+ return 0;
+ }
+
+ ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+ if (ctx->sys_cache == NULL) {
+ DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ ret = -ENOMEM;
+ goto Err_out;
+ }
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+ ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+ return 0;
+
+Err_out:
+ psp_v11_0_memory_training_fini(psp);
+ return ret;
+}
+
+/*
+ * save and restore proces
+ */
+static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t*)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ DRM_DEBUG("Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ DRM_ERROR("Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v11_0_is_sos_alive(psp)) {
+ DRM_DEBUG("SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ DRM_DEBUG("Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ DRM_DEBUG("Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long traing will encroach certain mount of bottom VRAM,
+ * saving the content of this bottom VRAM to system memory
+ * before training, and restoring it after training to avoid
+ * VRAM corruption.
+ */
+ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ DRM_ERROR("failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
+ vfree(buf);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ DRM_ERROR("send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v11_0_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v11_0_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
@@ -874,7 +1117,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_create = psp_v11_0_ring_create,
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
- .cmd_submit = psp_v11_0_cmd_submit,
.compare_sram_data = psp_v11_0_compare_sram_data,
.mode1_reset = psp_v11_0_mode1_reset,
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
@@ -885,6 +1127,11 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ras_trigger_error = psp_v11_0_ras_trigger_error,
.ras_cure_posion = psp_v11_0_ras_cure_posion,
.rlc_autoload_start = psp_v11_0_rlc_autoload_start,
+ .mem_training_init = psp_v11_0_memory_training_init,
+ .mem_training_fini = psp_v11_0_memory_training_fini,
+ .mem_training = psp_v11_0_memory_training,
+ .ring_get_wptr = psp_v11_0_ring_get_wptr,
+ .ring_set_wptr = psp_v11_0_ring_set_wptr,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
new file mode 100644
index 000000000000..58d8b6d732e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v12_0.h"
+
+#include "mp/mp_12_0_0_offset.h"
+#include "mp/mp_12_0_0_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "nbio/nbio_7_4_offset.h"
+
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
+/* address block */
+#define smnMP1_FIRMWARE_FLAGS 0x3010024
+
+static int psp_v12_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0;
+ const struct psp_firmware_header_v1_0 *asd_hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_RENOIR:
+ chip_name = "renoir";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
+ err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
+ if (err)
+ goto out1;
+
+ err = amdgpu_ucode_validate(adev->psp.asd_fw);
+ if (err)
+ goto out1;
+
+ asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
+ adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+ adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
+ adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+ adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
+ le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+
+ return 0;
+
+out1:
+ release_firmware(adev->psp.asd_fw);
+ adev->psp.asd_fw = NULL;
+
+ return err;
+}
+
+static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg) {
+ psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+ printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
+ return 0;
+ }
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP System Driver binary to memory */
+ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
+
+ /* Provide the sys driver to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 1 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 2 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static void psp_v12_0_reroute_ih(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t tmp;
+
+ /* Change IH ring for VMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+
+ /* Change IH ring for UMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+}
+
+static int psp_v12_0_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ psp_v12_0_reroute_ih(psp);
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool psp_v12_0_support_vmr_ring(struct psp_context *psp)
+{
+ if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
+ return true;
+ return false;
+}
+
+static int psp_v12_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v12_0_support_vmr_ring(psp)) {
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v12_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (psp_v12_0_support_vmr_ring(psp))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (psp_v12_0_support_vmr_ring(psp))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v12_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v12_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static int
+psp_v12_0_sram_map(struct amdgpu_device *adev,
+ unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+ unsigned int *sram_data_reg_offset,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ int ret = 0;
+
+ switch (ucode_id) {
+/* TODO: needs to confirm */
+#if 0
+ case AMDGPU_UCODE_ID_SMC:
+ *sram_offset = 0;
+ *sram_addr_reg_offset = 0;
+ *sram_data_reg_offset = 0;
+ break;
+#endif
+
+ case AMDGPU_UCODE_ID_CP_CE:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_PFP:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_ME:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ *sram_offset = 0x10000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ *sram_offset = 0x10000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_RLC_G:
+ *sram_offset = 0x2000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_SDMA0:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
+ break;
+
+/* TODO: needs to confirm */
+#if 0
+ case AMDGPU_UCODE_ID_SDMA1:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+
+ case AMDGPU_UCODE_ID_UVD:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+
+ case AMDGPU_UCODE_ID_VCE:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+#endif
+
+ case AMDGPU_UCODE_ID_MAXIMUM:
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static bool psp_v12_0_compare_sram_data(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ enum AMDGPU_UCODE_ID ucode_type)
+{
+ int err = 0;
+ unsigned int fw_sram_reg_val = 0;
+ unsigned int fw_sram_addr_reg_offset = 0;
+ unsigned int fw_sram_data_reg_offset = 0;
+ unsigned int ucode_size;
+ uint32_t *ucode_mem = NULL;
+ struct amdgpu_device *adev = psp->adev;
+
+ err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
+ &fw_sram_data_reg_offset, ucode_type);
+ if (err)
+ return false;
+
+ WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
+
+ ucode_size = ucode->ucode_size;
+ ucode_mem = (uint32_t *)ucode->kaddr;
+ while (ucode_size) {
+ fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
+
+ if (*ucode_mem != fw_sram_reg_val)
+ return false;
+
+ ucode_mem++;
+ /* 4 bytes */
+ ucode_size -= 4;
+ }
+
+ return true;
+}
+
+static int psp_v12_0_mode1_reset(struct psp_context *psp)
+{
+ int ret;
+ uint32_t offset;
+ struct amdgpu_device *adev = psp->adev;
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+
+ if (ret) {
+ DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ return -EINVAL;
+ }
+
+ /*send the mode 1 reset command*/
+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
+
+ msleep(500);
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+
+ if (ret) {
+ DRM_INFO("psp mode 1 reset failed!\n");
+ return -EINVAL;
+ }
+
+ DRM_INFO("psp mode1 reset succeed \n");
+
+ return 0;
+}
+
+static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v12_0_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v12_0_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v12_0_funcs = {
+ .init_microcode = psp_v12_0_init_microcode,
+ .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v12_0_bootloader_load_sos,
+ .ring_init = psp_v12_0_ring_init,
+ .ring_create = psp_v12_0_ring_create,
+ .ring_stop = psp_v12_0_ring_stop,
+ .ring_destroy = psp_v12_0_ring_destroy,
+ .compare_sram_data = psp_v12_0_compare_sram_data,
+ .mode1_reset = psp_v12_0_mode1_reset,
+ .ring_get_wptr = psp_v12_0_ring_get_wptr,
+ .ring_set_wptr = psp_v12_0_ring_set_wptr,
+};
+
+void psp_v12_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v12_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h
new file mode 100644
index 000000000000..241693ab1990
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V12_0_H__
+#define __PSP_V12_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v12_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 019c47feee42..735c43c7daab 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -179,7 +179,7 @@ static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver)
* Double check if the latest four legacy versions.
* If yes, it is still the right version.
*/
- for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) {
+ for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) {
if (sos_old_versions[i] == adev->psp.sos_fw_version)
return true;
}
@@ -410,65 +410,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v3_1_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- if (psp_v3_1_support_vmr_ring(psp))
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
- else
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- /* write_frame ptr increments by size of rb_frame in bytes */
- /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- if (psp_v3_1_support_vmr_ring(psp)) {
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
- /* send interrupt to PSP for SRIOV ring write pointer update */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_CONSUME_CMD);
- } else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v3_1_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -636,12 +577,37 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
{
- if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+ if (amdgpu_sriov_vf(psp->adev))
return true;
return false;
}
+static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v3_1_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+ return data;
+}
+
+static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ /* send interrupt to PSP for SRIOV ring write pointer update */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -650,11 +616,12 @@ static const struct psp_funcs psp_v3_1_funcs = {
.ring_create = psp_v3_1_ring_create,
.ring_stop = psp_v3_1_ring_stop,
.ring_destroy = psp_v3_1_ring_destroy,
- .cmd_submit = psp_v3_1_cmd_submit,
.compare_sram_data = psp_v3_1_compare_sram_data,
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
.mode1_reset = psp_v3_1_mode1_reset,
.support_vmr_ring = psp_v3_1_support_vmr_ring,
+ .ring_get_wptr = psp_v3_1_ring_get_wptr,
+ .ring_set_wptr = psp_v3_1_ring_set_wptr,
};
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..7d509a40076f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1260,16 +1260,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..b6109a99fc43 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1698,16 +1698,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4428018672d3..e55884d204bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -34,6 +34,18 @@
#include "sdma0/sdma0_4_2_sh_mask.h"
#include "sdma1/sdma1_4_2_offset.h"
#include "sdma1/sdma1_4_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
#include "hdp/hdp_4_0_offset.h"
#include "sdma0/sdma0_4_1_default.h"
@@ -55,6 +67,8 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@@ -68,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -202,25 +217,232 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
};
+static const struct soc15_reg_golden golden_settings_sdma_arct[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+};
+
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
{
- return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
- (adev->reg_offset[SDMA1_HWIP][0][0] + offset));
+ switch (instance) {
+ case 0:
+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
+ case 1:
+ return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
+ case 2:
+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
+ case 3:
+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
+ case 4:
+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
+ case 5:
+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
+ case 6:
+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
+ case 7:
+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ case 4:
+ return SOC15_IH_CLIENTID_SDMA4;
+ case 5:
+ return SOC15_IH_CLIENTID_SDMA5;
+ case 6:
+ return SOC15_IH_CLIENTID_SDMA6;
+ case 7:
+ return SOC15_IH_CLIENTID_SDMA7;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ return 3;
+ case SOC15_IH_CLIENTID_SDMA4:
+ return 4;
+ case SOC15_IH_CLIENTID_SDMA5:
+ return 5;
+ case SOC15_IH_CLIENTID_SDMA6:
+ return 6;
+ case SOC15_IH_CLIENTID_SDMA7:
+ return 7;
+ default:
+ break;
+ }
+ return -EINVAL;
}
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- if (!amdgpu_virt_support_skip_setting(adev)) {
- soc15_program_register_sequence(adev,
- golden_settings_sdma_4,
- ARRAY_SIZE(golden_settings_sdma_4));
- soc15_program_register_sequence(adev,
- golden_settings_sdma_vg10,
- ARRAY_SIZE(golden_settings_sdma_vg10));
- }
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg10,
+ ARRAY_SIZE(golden_settings_sdma_vg10));
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -241,6 +463,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma1_4_2,
ARRAY_SIZE(golden_settings_sdma1_4_2));
break;
+ case CHIP_ARCTURUS:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_arct,
+ ARRAY_SIZE(golden_settings_sdma_arct));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
@@ -254,11 +481,53 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_rv1,
ARRAY_SIZE(golden_settings_sdma_rv1));
break;
+ case CHIP_RENOIR:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4_3,
+ ARRAY_SIZE(golden_settings_sdma_4_3));
+ break;
default:
break;
}
}
+static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ int err = 0;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ err = amdgpu_ucode_validate(sdma_inst->fw);
+ if (err)
+ return err;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.instance[i].fw != NULL)
+ release_firmware(adev->sdma.instance[i].fw);
+
+ /* arcturus shares the same FW memory across
+ all SDMA isntances */
+ if (adev->asic_type == CHIP_ARCTURUS)
+ break;
+ }
+
+ memset((void*)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
/**
* sdma_v4_0_init_microcode - load ucode images from disk
*
@@ -278,7 +547,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
- const struct sdma_firmware_header_v1_0 *hdr;
DRM_DEBUG("\n");
@@ -300,30 +568,52 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
else
chip_name = "raven";
break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
+ case CHIP_RENOIR:
+ chip_name = "renoir";
+ break;
default:
BUG();
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
- if (err)
- goto out;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
- if (adev->sdma.instance[i].feature_version >= 20)
- adev->sdma.instance[i].burst_nop = true;
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+
+ err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
+ if (err)
+ goto out;
+
+ for (i = 1; i < adev->sdma.num_instances; i++) {
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* Acturus will leverage the same FW memory
+ for every SDMA instance */
+ memcpy((void*)&adev->sdma.instance[i],
+ (void*)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+ else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
+
+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
+ if (err)
+ goto out;
+ }
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
info->fw = adev->sdma.instance[i].fw;
@@ -332,13 +622,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
}
}
+
out:
if (err) {
DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ sdma_v4_0_destroy_inst_ctx(adev);
}
return err;
}
@@ -510,7 +798,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -559,16 +847,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask = 0;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->me == 0)
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
- else
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
sdma_v4_0_wait_reg_mem(ring, 0, 1,
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
ref_and_mask, ref_and_mask, 10);
}
@@ -620,26 +905,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
- int i;
+ int i, unset = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma[i] = &adev->sdma.instance[i].ring;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = 1;
+ }
- for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
- }
- sdma0->sched.ready = false;
- sdma1->sched.ready = false;
+ sdma[i]->sched.ready = false;
+ }
}
/**
@@ -663,16 +949,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
-
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ bool unset = false;
for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma[i] = &adev->sdma.instance[i].page;
+
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
+ (unset == false)) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = true;
+ }
+
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -681,10 +971,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
IB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
- }
- sdma0->sched.ready = false;
- sdma1->sched.ready = false;
+ sdma[i]->sched.ready = false;
+ }
}
/**
@@ -1018,6 +1307,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
sdma_v4_1_init_power_gating(adev);
sdma_v4_1_update_power_gating(adev, true);
break;
@@ -1389,7 +1679,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1473,8 +1763,10 @@ static int sdma_v4_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- if (adev->asic_type == CHIP_RAVEN)
+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)
adev->sdma.num_instances = 1;
+ else if (adev->asic_type == CHIP_ARCTURUS)
+ adev->sdma.num_instances = 8;
else
adev->sdma.num_instances = 2;
@@ -1494,109 +1786,33 @@ static int sdma_v4_0_early_init(void *handle)
sdma_v4_0_set_buffer_funcs(adev);
sdma_v4_0_set_vm_pte_funcs(adev);
sdma_v4_0_set_irq_funcs(adev);
+ sdma_v4_0_set_ras_funcs(adev);
return 0;
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
struct amdgpu_iv_entry *entry);
static int sdma_v4_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->sdma.ras_if;
struct ras_ih_if ih_info = {
.cb = sdma_v4_0_process_ras_data_cb,
};
- struct ras_fs_if fs_info = {
- .sysfs_name = "sdma_err_count",
- .debugfs_name = "sdma_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__SDMA,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "sdma",
- };
- int r;
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
- return 0;
- }
-
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__SDMA);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = ras_block;
+ int i;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__SDMA);
- r = 0;
- }
- goto feature;
+ /* read back edc counter registers to clear the counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
}
- ih_info.head = **ras_if;
- fs_info.head = **ras_if;
-
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
-
- amdgpu_ras_debugfs_create(adev, &fs_info);
-
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
- if (r)
- goto sysfs;
-resume:
- r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
- if (r)
- goto irq;
-
- r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1);
- if (r) {
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
- goto irq;
- }
-
- return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
+ return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+ else
+ return 0;
}
static int sdma_v4_0_sw_init(void *handle)
@@ -1606,28 +1822,22 @@ static int sdma_v4_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
- &adev->sdma.trap_irq);
- if (r)
- return r;
-
- /* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
- &adev->sdma.trap_irq);
- if (r)
- return r;
-
- /* SDMA SRAM ECC event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
- &adev->sdma.ecc_irq);
- if (r)
- return r;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
/* SDMA SRAM ECC event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC,
- &adev->sdma.ecc_irq);
- if (r)
- return r;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
+ }
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
@@ -1641,11 +1851,8 @@ static int sdma_v4_0_sw_init(void *handle)
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
- AMDGPU_SDMA_IRQ_INSTANCE1);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
if (r)
return r;
@@ -1663,9 +1870,7 @@ static int sdma_v4_0_sw_init(void *handle)
sprintf(ring->name, "page%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
- AMDGPU_SDMA_IRQ_INSTANCE1);
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
if (r)
return r;
}
@@ -1679,21 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
- adev->sdma.ras_if) {
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- /*remove fs first*/
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- /*remove the IH*/
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
+ adev->sdma.funcs->ras_fini(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -1701,10 +1893,7 @@ static int sdma_v4_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ sdma_v4_0_destroy_inst_ctx(adev);
return 0;
}
@@ -1714,11 +1903,13 @@ static int sdma_v4_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+ (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
- sdma_v4_0_init_golden_registers(adev);
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_0_init_golden_registers(adev);
r = sdma_v4_0_start(adev);
@@ -1728,18 +1919,22 @@ static int sdma_v4_0_hw_init(void *handle)
static int sdma_v4_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
if (amdgpu_sriov_vf(adev))
return 0;
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
- if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
- && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+ && adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+ adev->asic_type == CHIP_RENOIR)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
return 0;
@@ -1776,15 +1971,17 @@ static bool sdma_v4_0_is_idle(void *handle)
static int sdma_v4_0_wait_for_idle(void *handle)
{
- unsigned i;
- u32 sdma0, sdma1;
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
- sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
- sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
-
- if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
+ if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
return 0;
udelay(1);
}
@@ -1820,17 +2017,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
uint32_t instance;
DRM_DEBUG("IH: SDMA trap\n");
- switch (entry->client_id) {
- case SOC15_IH_CLIENTID_SDMA0:
- instance = 0;
- break;
- case SOC15_IH_CLIENTID_SDMA1:
- instance = 1;
- break;
- default:
- return 0;
- }
-
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
@@ -1851,55 +2038,26 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
struct amdgpu_iv_entry *entry)
{
- uint32_t instance, err_source;
-
- switch (entry->client_id) {
- case SOC15_IH_CLIENTID_SDMA0:
- instance = 0;
- break;
- case SOC15_IH_CLIENTID_SDMA1:
- instance = 1;
- break;
- default:
- return 0;
- }
-
- switch (entry->src_id) {
- case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
- err_source = 0;
- break;
- case SDMA0_4_0__SRCID__SDMA_ECC:
- err_source = 1;
- break;
- default:
- return 0;
- }
-
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-
- amdgpu_ras_reset_gpu(adev, 0);
-
- return AMDGPU_RAS_UE;
-}
+ int instance;
-static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ goto out;
- if (!ras_if)
- return 0;
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ goto out;
- ih_data.head = *ras_if;
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
- return 0;
+out:
+ return AMDGPU_RAS_SUCCESS;
}
static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
@@ -1910,16 +2068,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
DRM_ERROR("Illegal instruction in SDMA command stream\n");
- switch (entry->client_id) {
- case SOC15_IH_CLIENTID_SDMA0:
- instance = 0;
- break;
- case SOC15_IH_CLIENTID_SDMA1:
- instance = 1;
- break;
- default:
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
return 0;
- }
switch (entry->ring_id) {
case 0:
@@ -1936,14 +2087,10 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
{
u32 sdma_edc_config;
- u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
- sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) :
- sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG);
-
- sdma_edc_config = RREG32(reg_offset);
+ sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
- WREG32(reg_offset, sdma_edc_config);
+ WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
return 0;
}
@@ -1953,61 +2100,35 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
bool enable)
{
uint32_t data, def;
+ int i;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
- /* enable sdma0 clock gating */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
-
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
}
} else {
- /* disable sdma0 clock gating */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
- data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
-
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
- data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
}
}
}
@@ -2018,34 +2139,23 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
bool enable)
{
uint32_t data, def;
+ int i;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
- /* 1-not override: enable sdma0 mem light sleep */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
-
- /* 1-not override: enable sdma1 mem light sleep */
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
}
} else {
- /* 0-override:disable sdma0 mem light sleep */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
-
- /* 0-override:disable sdma1 mem light sleep */
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
}
}
}
@@ -2063,10 +2173,12 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
sdma_v4_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v4_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -2133,7 +2245,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_ring_get_wptr,
+ .set_wptr = sdma_v4_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/*
+ * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
+ * So create a individual constant ring_funcs for those instances.
+ */
+static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB_1,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_ring_get_wptr,
.set_wptr = sdma_v4_0_ring_set_wptr,
@@ -2165,7 +2313,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB_1,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_page_ring_get_wptr,
.set_wptr = sdma_v4_0_page_ring_set_wptr,
@@ -2197,10 +2377,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
+ adev->sdma.instance[i].ring.funcs =
+ &sdma_v4_0_ring_funcs_2nd_mmhub;
+ else
+ adev->sdma.instance[i].ring.funcs =
+ &sdma_v4_0_ring_funcs;
adev->sdma.instance[i].ring.me = i;
if (adev->sdma.has_page_queue) {
- adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_0_page_ring_funcs_2nd_mmhub;
+ else
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_0_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
}
@@ -2217,17 +2407,30 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
.set = sdma_v4_0_set_ecc_irq_state,
- .process = sdma_v4_0_process_ecc_irq,
+ .process = amdgpu_sdma_process_ecc_irq,
};
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ switch (adev->sdma.num_instances) {
+ case 1:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
+ break;
+ case 8:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ break;
+ case 2:
+ default:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
+ break;
+ }
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
}
@@ -2293,8 +2496,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
{
adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
- if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1)
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
else
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
@@ -2313,21 +2516,77 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
unsigned i;
adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
- if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) {
- for (i = 1; i < adev->sdma.num_instances; i++) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
sched = &adev->sdma.instance[i].page.sched;
- adev->vm_manager.vm_pte_rqs[i - 1] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- }
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1;
- adev->vm_manager.page_fault = &adev->sdma.instance[0].page;
- } else {
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ else
sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_0_ras_fields[i].sec_count_mask) >>
+ sdma_v4_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_0_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ }
+}
+
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+
+ reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_0_get_ras_error_count(reg_value,
+ instance, &sec_count);
+ /* err_data->ce_count should be initialized to 0
+ * before calling into this function */
+ err_data->ce_count += sec_count;
+ /* double bit error is not supported
+ * set ue count to 0 */
+ err_data->ue_count = 0;
+
+ return 0;
+};
+
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_sdma_ras_late_init,
+ .ras_fini = amdgpu_sdma_ras_fini,
+ .query_ras_error_count = sdma_v4_0_query_ras_error_count,
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+ break;
+ default:
+ break;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3747c3f1f0cc..67b9830b7c7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -21,8 +21,11 @@
*
*/
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -42,6 +45,12 @@
MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
+
#define SDMA1_REG_OFFSET 0x600
#define SDMA0_HYP_DEC_REG_START 0x5880
#define SDMA0_HYP_DEC_REG_END 0x5893
@@ -59,7 +68,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
@@ -71,7 +80,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
@@ -80,6 +89,18 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
};
static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
};
static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
@@ -111,6 +132,22 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_nv10,
(const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
break;
+ case CHIP_NAVI14:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv14,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
+ break;
+ case CHIP_NAVI12:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv12,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
+ break;
default:
break;
}
@@ -143,6 +180,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
case CHIP_NAVI10:
chip_name = "navi10";
break;
+ case CHIP_NAVI14:
+ chip_name = "navi14";
+ break;
+ case CHIP_NAVI12:
+ chip_name = "navi12";
+ break;
default:
BUG();
}
@@ -339,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
- /* IB packet must end on a 8 DW boundary */
- sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -363,7 +413,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask = 0;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->me == 0)
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
@@ -373,8 +423,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
amdgpu_ring_write(ring, ref_and_mask); /* reference */
amdgpu_ring_write(ring, ref_and_mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
@@ -640,7 +690,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
ring->doorbell_index, 20);
if (amdgpu_sriov_vf(adev))
@@ -861,19 +911,12 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
if (amdgpu_emu_mode == 1)
msleep(1);
else
- DRM_UDELAY(1);
+ udelay(1);
}
- if (i < adev->usec_timeout) {
- if (amdgpu_emu_mode == 1)
- DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i);
- else
- DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
- r = -EINVAL;
- }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
amdgpu_device_wb_free(adev, index);
return r;
@@ -938,13 +981,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+ else
r = -EINVAL;
- }
err1:
amdgpu_ib_free(adev, &ib, NULL);
@@ -1043,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
- *
+ * sdma_v5_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
*
+ * Pad the IB with NOPs to a boundary multiple of 8.
*/
static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
@@ -1054,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 0x7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1086,7 +1126,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
@@ -1130,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
static int sdma_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1316,7 +1366,7 @@ static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
if (ring->trail_seq ==
le32_to_cpu(*(ring->trail_fence_cpu_addr)))
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout) {
@@ -1472,10 +1522,12 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
sdma_v5_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v5_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1532,7 +1584,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB,
+ .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = sdma_v5_0_ring_get_rptr,
.get_wptr = sdma_v5_0_ring_get_wptr,
.set_wptr = sdma_v5_0_ring_set_wptr,
@@ -1543,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
/* sdma_v5_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1557,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.pad_ib = sdma_v5_0_ring_pad_ib,
.emit_wreg = sdma_v5_0_ring_emit_wreg,
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
@@ -1583,7 +1636,8 @@ static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = {
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs;
}
@@ -1664,17 +1718,15 @@ static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 4d74453f3cfb..4d415bfdb42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -975,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
{GRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
{GB_ADDR_CONFIG},
{MC_ARB_RAMCFG},
{GB_TILE_MODE0},
@@ -1186,6 +1197,17 @@ static int si_asic_reset(struct amdgpu_device *adev)
return 0;
}
+static bool si_asic_supports_baco(struct amdgpu_device *adev)
+{
+ return false;
+}
+
+static enum amd_reset_method
+si_asic_reset_method(struct amdgpu_device *adev)
+{
+ return AMD_RESET_METHOD_LEGACY;
+}
+
static u32 si_get_config_memsize(struct amdgpu_device *adev)
{
return RREG32(mmCONFIG_MEMSIZE);
@@ -1394,6 +1416,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.read_bios_from_rom = &si_read_bios_from_rom,
.read_register = &si_read_register,
.reset = &si_asic_reset,
+ .reset_method = &si_asic_reset_method,
.set_vga_state = &si_vga_set_state,
.get_xclk = &si_get_xclk,
.set_uvd_clocks = &si_set_uvd_clocks,
@@ -1407,6 +1430,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_pcie_usage = &si_get_pcie_usage,
.need_reset_on_init = &si_need_reset_on_init,
.get_pcie_replay_count = &si_get_pcie_replay_count,
+ .supports_baco = &si_asic_supports_baco,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
@@ -1626,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1661,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1675,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1699,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
}
for (i = 0; i < 10; i++) {
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -1719,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
mdelay(100);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -1750,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
@@ -1881,7 +1930,7 @@ static void si_program_aspm(struct amdgpu_device *adev)
if (orig != data)
si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
- if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) {
+ if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
data &= ~PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
@@ -1930,14 +1979,14 @@ static void si_program_aspm(struct amdgpu_device *adev)
orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
data &= ~LS2_EXIT_TIME_MASK;
- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN))
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
data |= LS2_EXIT_TIME(5);
if (orig != data)
si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
data &= ~LS2_EXIT_TIME_MASK;
- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN))
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
data |= LS2_EXIT_TIME(5);
if (orig != data)
si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index bdda8b4e03f0..42d5601b6bf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -648,7 +648,7 @@ static int si_dma_set_clockgating_state(void *handle,
bool enable;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ enable = (state == AMD_CG_STATE_GATE);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -834,16 +834,14 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version si_dma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 57bb5f9e08b2..88ae27a5a03d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
si_ih_disable_interrupts(adev);
- WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
new file mode 100644
index 000000000000..c902f26cf50d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
+
+#include "smu_v11_0_i2c.h"
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include <drm/drm_fixed.h>
+#include <drm/drm_drv.h>
+#include "amdgpu_amdkfd.h"
+#include <linux/i2c.h>
+#include <linux/pci.h>
+#include "amdgpu_ras.h"
+
+/* error codes */
+#define I2C_OK 0
+#define I2C_NAK_7B_ADDR_NOACK 1
+#define I2C_NAK_TXDATA_NOACK 2
+#define I2C_TIMEOUT 4
+#define I2C_SW_TIMEOUT 8
+#define I2C_ABORT 0x10
+
+/* I2C transaction flags */
+#define I2C_NO_STOP 1
+#define I2C_RESTART 2
+
+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev
+#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor)
+
+static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT);
+
+ reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0);
+ WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg);
+}
+
+
+static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0);
+}
+
+static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ /* do */
+ {
+ RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR);
+
+ } /* while (reg_CKSVII2C_ic_clr_intr == 0) */
+}
+
+static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t reg = 0;
+
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0);
+ /* Standard mode */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1);
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg);
+}
+
+static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ /*
+ * Standard mode speed, These values are taken from SMUIO MAS,
+ * but are different from what is given is
+ * Synopsys spec. The values here are based on assumption
+ * that refclock is 100MHz
+ *
+ * Configuration for standard mode; Speed = 100kbps
+ * Scale linearly, for now only support standard speed clock
+ * This will work only with 100M ref clock
+ *
+ * TBD:Change the calculation to take into account ref clock values also.
+ */
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20);
+}
+
+static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ /* Convert fromr 8-bit to 7-bit address */
+ address >>= 1;
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF));
+}
+
+static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t ret = I2C_OK;
+ uint32_t reg, reg_c_tx_abrt_source;
+
+ /*Check if transmission is completed */
+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ do {
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ break;
+ }
+
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+
+ } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0);
+
+ if (ret != I2C_OK)
+ return ret;
+
+ /* This only checks if NAK is received and transaction got aborted */
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT);
+
+ if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) {
+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE);
+ DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source);
+
+ /* Check for stop due to NACK */
+ if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_TXDATA_NOACK) == 1) {
+
+ ret |= I2C_NAK_TXDATA_NOACK;
+
+ } else if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_7B_ADDR_NOACK) == 1) {
+
+ ret |= I2C_NAK_7B_ADDR_NOACK;
+ } else {
+ ret |= I2C_ABORT;
+ }
+
+ smu_v11_0_i2c_clear_status(control);
+ }
+
+ return ret;
+}
+
+static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t ret = I2C_OK;
+ uint32_t reg_ic_status, reg_c_tx_abrt_source;
+
+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE);
+
+ /* If slave is not present */
+ if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_7B_ADDR_NOACK) == 1) {
+ ret |= I2C_NAK_7B_ADDR_NOACK;
+
+ smu_v11_0_i2c_clear_status(control);
+ } else { /* wait till some data is there in RXFIFO */
+ /* Poll for some byte in RXFIFO */
+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ do {
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ break;
+ }
+
+ reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+
+ } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0);
+ }
+
+ return ret;
+}
+
+
+
+
+/**
+ * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device.
+ *
+ * @address: The I2C address of the slave device.
+ * @data: The data to transmit over the bus.
+ * @numbytes: The amount of data to transmit.
+ * @i2c_flag: Flags for transmission
+ *
+ * Returns 0 on success or error.
+ */
+static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
+ uint8_t address, uint8_t *data,
+ uint32_t numbytes, uint32_t i2c_flag)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t bytes_sent, reg, ret = 0;
+ unsigned long timeout_counter;
+
+ bytes_sent = 0;
+
+ DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
+ (uint16_t)address, numbytes);
+
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
+ 16, 1, data, numbytes, false);
+ }
+
+ /* Set the I2C slave address */
+ smu_v11_0_i2c_set_address(control, address);
+ /* Enable I2C */
+ smu_v11_0_i2c_enable(control, true);
+
+ /* Clear status bits */
+ smu_v11_0_i2c_clear_status(control);
+
+
+ timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ while (numbytes > 0) {
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+ if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) {
+ do {
+ reg = 0;
+ /*
+ * Prepare transaction, no need to set RESTART. I2C engine will send
+ * START as soon as it sees data in TXFIFO
+ */
+ if (bytes_sent == 0)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART,
+ (i2c_flag & I2C_RESTART) ? 1 : 0);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]);
+
+ /* determine if we need to send STOP bit or not */
+ if (numbytes == 1)
+ /* Final transaction, so send stop unless I2C_NO_STOP */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP,
+ (i2c_flag & I2C_NO_STOP) ? 0 : 1);
+ /* Write */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
+
+ /* Record that the bytes were transmitted */
+ bytes_sent++;
+ numbytes--;
+
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+
+ } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF));
+ }
+
+ /*
+ * We waited too long for the transmission FIFO to become not-full.
+ * Exit the loop with error.
+ */
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ goto Err;
+ }
+ }
+
+ ret = smu_v11_0_i2c_poll_tx_status(control);
+
+Err:
+ /* Any error, no point in proceeding */
+ if (ret != I2C_OK) {
+ if (ret & I2C_SW_TIMEOUT)
+ DRM_ERROR("TIMEOUT ERROR !!!");
+
+ if (ret & I2C_NAK_7B_ADDR_NOACK)
+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!");
+
+
+ if (ret & I2C_NAK_TXDATA_NOACK)
+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!");
+ }
+
+ return ret;
+}
+
+
+/**
+ * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device.
+ *
+ * @address: The I2C address of the slave device.
+ * @numbytes: The amount of data to transmit.
+ * @i2c_flag: Flags for transmission
+ *
+ * Returns 0 on success or error.
+ */
+static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
+ uint8_t address, uint8_t *data,
+ uint32_t numbytes, uint8_t i2c_flag)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t bytes_received, ret = I2C_OK;
+
+ bytes_received = 0;
+
+ /* Set the I2C slave address */
+ smu_v11_0_i2c_set_address(control, address);
+
+ /* Enable I2C */
+ smu_v11_0_i2c_enable(control, true);
+
+ while (numbytes > 0) {
+ uint32_t reg = 0;
+
+ smu_v11_0_i2c_clear_status(control);
+
+
+ /* Prepare transaction */
+
+ /* Each time we disable I2C, so this is not a restart */
+ if (bytes_received == 0)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART,
+ (i2c_flag & I2C_RESTART) ? 1 : 0);
+
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0);
+ /* Read */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1);
+
+ /* Transmitting last byte */
+ if (numbytes == 1)
+ /* Final transaction, so send stop if requested */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP,
+ (i2c_flag & I2C_NO_STOP) ? 0 : 1);
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
+
+ ret = smu_v11_0_i2c_poll_rx_status(control);
+
+ /* Any error, no point in proceeding */
+ if (ret != I2C_OK) {
+ if (ret & I2C_SW_TIMEOUT)
+ DRM_ERROR("TIMEOUT ERROR !!!");
+
+ if (ret & I2C_NAK_7B_ADDR_NOACK)
+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!");
+
+ if (ret & I2C_NAK_TXDATA_NOACK)
+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!");
+
+ break;
+ }
+
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD);
+ data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT);
+
+ /* Record that the bytes were received */
+ bytes_received++;
+ numbytes--;
+ }
+
+ DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :",
+ (uint16_t)address, bytes_received);
+
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
+ 16, 1, data, bytes_received, false);
+ }
+
+ return ret;
+}
+
+static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t reg = 0;
+
+ /* Enable I2C engine; */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg);
+
+ /* Abort previous transaction */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg);
+
+ DRM_DEBUG_DRIVER("I2C_Abort() Done.");
+}
+
+
+static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ const uint32_t IDLE_TIMEOUT = 1024;
+ uint32_t timeout_count = 0;
+ uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity;
+
+ reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
+ reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
+
+
+ if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) &&
+ (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) {
+ /*
+ * Nobody is using I2C engine, but engine remains active because
+ * someone missed to send STOP
+ */
+ smu_v11_0_i2c_abort(control);
+ } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) {
+ /* Nobody is using I2C engine */
+ return true;
+ }
+
+ /* Keep reading activity bit until it's cleared */
+ do {
+ reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY);
+
+ if (REG_GET_FIELD(reg_ic_clr_activity,
+ CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0)
+ return true;
+
+ ++timeout_count;
+
+ } while (timeout_count < IDLE_TIMEOUT);
+
+ return false;
+}
+
+static void smu_v11_0_i2c_init(struct i2c_adapter *control)
+{
+ /* Disable clock gating */
+ smu_v11_0_i2c_set_clock_gating(control, false);
+
+ if (!smu_v11_0_i2c_activity_done(control))
+ DRM_WARN("I2C busy !");
+
+ /* Disable I2C */
+ smu_v11_0_i2c_enable(control, false);
+
+ /* Configure I2C to operate as master and in standard mode */
+ smu_v11_0_i2c_configure(control);
+
+ /* Initialize the clock to 50 kHz default */
+ smu_v11_0_i2c_set_clock(control);
+
+}
+
+static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint32_t reg_ic_enable_status, reg_ic_enable;
+
+ smu_v11_0_i2c_enable(control, false);
+
+ /* Double check if disabled, else force abort */
+ reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
+ reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
+
+ if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) &&
+ (REG_GET_FIELD(reg_ic_enable_status,
+ CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) {
+ /*
+ * Nobody is using I2C engine, but engine remains active because
+ * someone missed to send STOP
+ */
+ smu_v11_0_i2c_abort(control);
+ }
+
+ /* Restore clock gating */
+
+ /*
+ * TODO Reenabling clock gating seems to break subsequent SMU operation
+ * on the I2C bus. My guess is that SMU doesn't disable clock gating like
+ * we do here before working with the bus. So for now just don't restore
+ * it but later work with SMU to see if they have this issue and can
+ * update their code appropriately
+ */
+ /* smu_v11_0_i2c_set_clock_gating(control, true); */
+
+}
+
+static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ /* Send PPSMC_MSG_RequestI2CBus */
+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access)
+ goto Fail;
+
+
+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true))
+ return true;
+
+Fail:
+ return false;
+}
+
+static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+
+ /* Send PPSMC_MSG_RequestI2CBus */
+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access)
+ goto Fail;
+
+ /* Send PPSMC_MSG_ReleaseI2CBus */
+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle,
+ false))
+ return true;
+
+Fail:
+ return false;
+}
+
+/***************************** EEPROM I2C GLUE ****************************/
+
+static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control,
+ uint8_t address,
+ uint8_t *data,
+ uint32_t numbytes)
+{
+ uint32_t ret = 0;
+
+ /* First 2 bytes are dummy write to set EEPROM address */
+ ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP);
+ if (ret != I2C_OK)
+ goto Fail;
+
+ /* Now read data starting with that address */
+ ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2,
+ I2C_RESTART);
+
+Fail:
+ if (ret != I2C_OK)
+ DRM_ERROR("ReadData() - I2C error occurred :%x", ret);
+
+ return ret;
+}
+
+static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control,
+ uint8_t address,
+ uint8_t *data,
+ uint32_t numbytes)
+{
+ uint32_t ret;
+
+ ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0);
+
+ if (ret != I2C_OK)
+ DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret);
+ else
+ /*
+ * According to EEPROM spec there is a MAX of 10 ms required for
+ * EEPROM to flush internal RX buffer after STOP was issued at the
+ * end of write transaction. During this time the EEPROM will not be
+ * responsive to any more commands - so wait a bit more.
+ *
+ * TODO Improve to wait for first ACK for slave address after
+ * internal write cycle done.
+ */
+ msleep(10);
+
+ return ret;
+
+}
+
+static void lock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c);
+
+ if (!smu_v11_0_i2c_bus_lock(i2c)) {
+ DRM_ERROR("Failed to lock the bus from SMU");
+ return;
+ }
+
+ control->bus_locked = true;
+}
+
+static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ WARN_ONCE(1, "This operation not supposed to run in atomic context!");
+ return false;
+}
+
+static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c);
+
+ if (!smu_v11_0_i2c_bus_unlock(i2c)) {
+ DRM_ERROR("Failed to unlock the bus from SMU");
+ return;
+ }
+
+ control->bus_locked = false;
+}
+
+static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = {
+ .lock_bus = lock_bus,
+ .trylock_bus = trylock_bus,
+ .unlock_bus = unlock_bus,
+};
+
+static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap,
+ struct i2c_msg *msgs, int num)
+{
+ int i, ret;
+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap);
+
+ if (!control->bus_locked) {
+ DRM_ERROR("I2C bus unlocked, stopping transaction!");
+ return -EIO;
+ }
+
+ smu_v11_0_i2c_init(i2c_adap);
+
+ for (i = 0; i < num; i++) {
+ if (msgs[i].flags & I2C_M_RD)
+ ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap,
+ (uint8_t)msgs[i].addr,
+ msgs[i].buf, msgs[i].len);
+ else
+ ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap,
+ (uint8_t)msgs[i].addr,
+ msgs[i].buf, msgs[i].len);
+
+ if (ret != I2C_OK) {
+ num = -EIO;
+ break;
+ }
+ }
+
+ smu_v11_0_i2c_fini(i2c_adap);
+ return num;
+}
+
+static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap)
+{
+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+
+static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = {
+ .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer,
+ .functionality = smu_v11_0_i2c_eeprom_i2c_func,
+};
+
+int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int res;
+
+ control->owner = THIS_MODULE;
+ control->class = I2C_CLASS_SPD;
+ control->dev.parent = &adev->pdev->dev;
+ control->algo = &smu_v11_0_i2c_eeprom_i2c_algo;
+ snprintf(control->name, sizeof(control->name), "RAS EEPROM");
+ control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops;
+
+ res = i2c_add_adapter(control);
+ if (res)
+ DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
+
+ return res;
+}
+
+void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control)
+{
+ i2c_del_adapter(control);
+}
+
+/*
+ * Keep this for future unit test if bugs arise
+ */
+#if 0
+#define I2C_TARGET_ADDR 0xA0
+
+bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control)
+{
+
+ uint32_t ret = I2C_OK;
+ uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef};
+
+
+ DRM_INFO("Begin");
+
+ if (!smu_v11_0_i2c_bus_lock(control)) {
+ DRM_ERROR("Failed to lock the bus!.");
+ return false;
+ }
+
+ smu_v11_0_i2c_init(control);
+
+ /* Write 0xde to address 0x0000 on the EEPROM */
+ ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6);
+
+ ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6);
+
+ smu_v11_0_i2c_fini(control);
+
+ smu_v11_0_i2c_bus_unlock(control);
+
+
+ DRM_INFO("End");
+ return true;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
new file mode 100644
index 000000000000..973f28d68e70
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU_V11_I2C_CONTROL_H
+#define SMU_V11_I2C_CONTROL_H
+
+#include <linux/types.h>
+
+struct i2c_adapter;
+
+int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control);
+void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 23265414d448..15f3424a1ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -58,11 +58,18 @@
#include "mmhub_v1_0.h"
#include "df_v1_7.h"
#include "df_v3_6.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
#include "vega10_ih.h"
#include "sdma_v4_0.h"
#include "uvd_v7_0.h"
#include "vce_v4_0.h"
#include "vcn_v1_0.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
#include "dce_virtual.h"
#include "mxgpu_ai.h"
#include "amdgpu_smu.h"
@@ -89,8 +96,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
u32 r;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -104,8 +111,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -115,6 +122,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u64 r;
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ /* read low 32 bit */
+ WREG32(address, reg);
+ (void)RREG32(address);
+ r = RREG32(data);
+
+ /* read high 32 bit*/
+ WREG32(address, reg + 4);
+ (void)RREG32(address);
+ r |= ((u64)RREG32(data) << 32);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ /* write low 32 bit */
+ WREG32(address, reg);
+ (void)RREG32(address);
+ WREG32(data, (u32)(v & 0xffffffffULL));
+ (void)RREG32(data);
+
+ /* write high 32 bit */
+ WREG32(address, reg + 4);
+ (void)RREG32(address);
+ WREG32(data, (u32)(v >> 32));
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
@@ -217,7 +267,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_memsize(adev);
+ return adev->nbio.funcs->get_memsize(adev);
}
static u32 soc15_get_xclk(struct amdgpu_device *adev)
@@ -291,6 +341,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -416,7 +467,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio_funcs->get_memsize(adev);
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
if (memsize != 0xffffffff)
break;
@@ -428,76 +479,96 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
return ret;
}
-static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
-{
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
- *cap = false;
- return -ENOENT;
- }
-
- return pp_funcs->get_asic_baco_capability(pp_handle, cap);
-}
-
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
{
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
- return -ENOENT;
-
- /* enter BACO state */
- if (pp_funcs->set_asic_baco_state(pp_handle, 1))
- return -EIO;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
- /* exit BACO state */
- if (pp_funcs->set_asic_baco_state(pp_handle, 0))
- return -EIO;
+ /* avoid NBIF got stuck when do RAS recovery in BACO reset */
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
- dev_info(adev->dev, "GPU BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ if (ret)
+ return ret;
- adev->in_baco_reset = 1;
+ /* re-enable doorbell interrupt after BACO exit */
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
}
-static int soc15_asic_reset(struct amdgpu_device *adev)
+static enum amd_reset_method
+soc15_asic_reset_method(struct amdgpu_device *adev)
{
- int ret;
- bool baco_reset;
+ bool baco_reset = false;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ return AMD_RESET_METHOD_MODE2;
case CHIP_VEGA10:
case CHIP_VEGA12:
- soc15_asic_get_baco_capability(adev, &baco_reset);
+ case CHIP_ARCTURUS:
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
break;
case CHIP_VEGA20:
if (adev->psp.sos_fw_version >= 0x80067)
- soc15_asic_get_baco_capability(adev, &baco_reset);
- else
- baco_reset = false;
- if (baco_reset) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
- if (hive || (ras && ras->supported))
- baco_reset = false;
- }
+ /*
+ * 1. PMFW version > 0x284300: all cases use baco
+ * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+ */
+ if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+ baco_reset = false;
break;
default:
- baco_reset = false;
break;
}
if (baco_reset)
- ret = soc15_asic_baco_reset(adev);
+ return AMD_RESET_METHOD_BACO;
else
- ret = soc15_asic_mode1_reset(adev);
+ return AMD_RESET_METHOD_MODE1;
+}
- return ret;
+static int soc15_asic_reset(struct amdgpu_device *adev)
+{
+ /* original raven doesn't have full asic reset */
+ if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8)
+ return 0;
+
+ switch (soc15_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_BACO:
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ return soc15_asic_baco_reset(adev);
+ case AMD_RESET_METHOD_MODE2:
+ return amdgpu_dpm_mode2_reset(adev);
+ default:
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ return soc15_asic_mode1_reset(adev);
+ }
+}
+
+static bool soc15_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_ARCTURUS:
+ return amdgpu_dpm_is_baco_supported(adev);
+ case CHIP_VEGA20:
+ if (adev->psp.sos_fw_version >= 0x80067)
+ return amdgpu_dpm_is_baco_supported(adev);
+ return false;
+ default:
+ return false;
+ }
}
/*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -556,8 +627,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
- adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
}
static const struct amdgpu_ip_block_version vega10_common_ip_block =
@@ -571,7 +642,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_rev_id(adev);
+ return adev->nbio.funcs->get_rev_id(adev);
}
int soc15_set_ip_blocks(struct amdgpu_device *adev)
@@ -581,32 +652,41 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
vega10_reg_base_init(adev);
break;
case CHIP_VEGA20:
vega20_reg_base_init(adev);
break;
+ case CHIP_ARCTURUS:
+ arct_reg_base_init(adev);
+ break;
default:
return -EINVAL;
}
- if (adev->asic_type == CHIP_VEGA20)
+ if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
adev->gmc.xgmi.supported = true;
- if (adev->flags & AMD_IS_APU)
- adev->nbio_funcs = &nbio_v7_0_funcs;
- else if (adev->asic_type == CHIP_VEGA20)
- adev->nbio_funcs = &nbio_v7_4_funcs;
- else
- adev->nbio_funcs = &nbio_v6_1_funcs;
+ if (adev->flags & AMD_IS_APU) {
+ adev->nbio.funcs = &nbio_v7_0_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+ } else if (adev->asic_type == CHIP_VEGA20 ||
+ adev->asic_type == CHIP_ARCTURUS) {
+ adev->nbio.funcs = &nbio_v7_4_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+ } else {
+ adev->nbio.funcs = &nbio_v6_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+ }
- if (adev->asic_type == CHIP_VEGA20)
- adev->df_funcs = &df_v3_6_funcs;
+ if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
+ adev->df.funcs = &df_v3_6_funcs;
else
- adev->df_funcs = &df_v1_7_funcs;
+ adev->df.funcs = &df_v1_7_funcs;
adev->rev_id = soc15_get_rev_id(adev);
- adev->nbio_funcs->detect_hw_virt(adev);
+ adev->nbio.funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev))
adev->virt.ops = &xgpu_ai_virt_ops;
@@ -638,11 +718,11 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
}
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- if (!amdgpu_sriov_vf(adev)) {
- if (is_support_sw_smu(adev))
+ if (is_support_sw_smu(adev)) {
+ if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
}
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -672,6 +752,53 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
#endif
amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
break;
+ case CHIP_ARCTURUS:
+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ }
+
+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ }
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
+ break;
+ case CHIP_RENOIR:
+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+ break;
default:
return -EINVAL;
}
@@ -681,14 +808,14 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- adev->nbio_funcs->hdp_flush(adev, ring);
+ adev->nbio.funcs->hdp_flush(adev, ring);
}
static void soc15_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
else
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
@@ -714,14 +841,9 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs */
+ /* Reg 104 is # of posted requests sent */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
- /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */
- if (adev->asic_type == CHIP_VEGA20)
- perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
- EVENT1_SEL, 108);
- else
- perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
- EVENT1_SEL, 104);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
/* Write to enable desired perf counters */
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
@@ -751,6 +873,55 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
+static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs */
+ /* Reg 108 is # of posted requests sent on VG20 */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
+ EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
+ EVENT1_SEL, 108);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32);
+}
+
static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
{
u32 sol_reg;
@@ -792,6 +963,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.read_bios_from_rom = &soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
.set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
@@ -804,6 +976,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -812,6 +985,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.read_bios_from_rom = &soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
.set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
@@ -821,9 +995,10 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega20_doorbell_index_init,
- .get_pcie_usage = &soc15_get_pcie_usage,
+ .get_pcie_usage = &vega20_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
};
static int soc15_common_early_init(void *handle)
@@ -837,6 +1012,8 @@ static int soc15_common_early_init(void *handle)
adev->smc_wreg = NULL;
adev->pcie_rreg = &soc15_pcie_rreg;
adev->pcie_wreg = &soc15_pcie_wreg;
+ adev->pcie_rreg64 = &soc15_pcie_rreg64;
+ adev->pcie_wreg64 = &soc15_pcie_wreg64;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
adev->didt_rreg = &soc15_didt_rreg;
@@ -992,11 +1169,53 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
}
-
- if (adev->pm.pp_feature & PP_GFXOFF_MASK)
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
+ case CHIP_ARCTURUS:
+ adev->asic_funcs = &vega20_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x32;
+ break;
+ case CHIP_RENOIR:
+ adev->asic_funcs = &soc15_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_DF_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x91;
break;
default:
/* FIXME: not supported yet */
@@ -1014,11 +1233,15 @@ static int soc15_common_early_init(void *handle)
static int soc15_common_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r = 0;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- return 0;
+ if (adev->nbio.funcs->ras_late_init)
+ r = adev->nbio.funcs->ras_late_init(adev);
+
+ return r;
}
static int soc15_common_sw_init(void *handle)
@@ -1028,13 +1251,17 @@ static int soc15_common_sw_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
- adev->df_funcs->sw_init(adev);
+ adev->df.funcs->sw_init(adev);
return 0;
}
static int soc15_common_sw_fini(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_nbio_ras_fini(adev);
+ adev->df.funcs->sw_fini(adev);
return 0;
}
@@ -1043,21 +1270,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
int i;
struct amdgpu_ring *ring;
- /* Two reasons to skip
- * 1, Host driver already programmed them
- * 2, To avoid registers program violations in SR-IOV
- */
- if (!amdgpu_virt_support_skip_setting(adev)) {
+ /* sdma/ih doorbell range are programed by hypervisor */
+ if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- adev->nbio_funcs->sdma_doorbell_range(adev, i,
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
ring->use_doorbell, ring->doorbell_index,
adev->doorbell_index.sdma_doorbell_range);
}
- }
- adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
adev->irq.ih.doorbell_index);
+ }
}
static int soc15_common_hw_init(void *handle)
@@ -1069,13 +1293,13 @@ static int soc15_common_hw_init(void *handle)
/* enable aspm */
soc15_program_aspm(adev);
/* setup nbio registers */
- adev->nbio_funcs->init_registers(adev);
+ adev->nbio.funcs->init_registers(adev);
/* remap HDP registers to a hole in mmio space,
* for the purpose of expose those registers
* to process space
*/
- if (adev->nbio_funcs->remap_hdp_registers)
- adev->nbio_funcs->remap_hdp_registers(adev);
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
@@ -1098,6 +1322,14 @@ static int soc15_common_hw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
+ if (adev->nbio.ras_if &&
+ amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (adev->nbio.funcs->init_ras_controller_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+ if (adev->nbio.funcs->init_ras_err_event_athub_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
return 0;
}
@@ -1134,7 +1366,8 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable
{
uint32_t def, data;
- if (adev->asic_type == CHIP_VEGA20) {
+ if (adev->asic_type == CHIP_VEGA20 ||
+ adev->asic_type == CHIP_ARCTURUS) {
def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -1237,34 +1470,39 @@ static int soc15_common_set_clockgating_state(void *handle,
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
soc15_update_hdp_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_rom_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->df_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
+ adev->df.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
break;
case CHIP_RAVEN:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ case CHIP_RENOIR:
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
soc15_update_hdp_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_rom_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
+ break;
+ case CHIP_ARCTURUS:
+ soc15_update_hdp_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1280,7 +1518,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio_funcs->get_clockgating_state(adev, flags);
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
/* AMD_CG_SUPPORT_HDP_LS */
data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
@@ -1302,7 +1540,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
*flags |= AMD_CG_SUPPORT_ROM_MGCG;
- adev->df_funcs->get_clockgating_state(adev, flags);
+ adev->df.funcs->get_clockgating_state(adev, flags);
}
static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 7a6b2cc6d9f5..d0fb7a67c1a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -28,8 +28,8 @@
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3
extern const struct amd_ip_funcs soc15_common_ip_funcs;
@@ -60,6 +60,18 @@ struct soc15_allowed_register_entry {
bool grbm_indexed;
};
+struct soc15_ras_field_entry {
+ const char *name;
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t sec_count_mask;
+ uint32_t sec_count_shift;
+ uint32_t ded_count_mask;
+ uint32_t ded_count_shift;
+};
+
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
@@ -67,6 +79,8 @@ struct soc15_allowed_register_entry {
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
+#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT
+
void soc15_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int soc15_set_ip_blocks(struct amdgpu_device *adev);
@@ -77,6 +91,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
+int arct_reg_base_init(struct amdgpu_device *adev);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 47f74dab365d..19e870c79896 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -52,6 +52,7 @@
uint32_t old_ = 0; \
uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
uint32_t loop = adev->usec_timeout; \
+ ret = 0; \
while ((tmp_ & (mask)) != (expected_value)) { \
if (old_ != tmp_) { \
loop = adev->usec_timeout; \
@@ -69,9 +70,10 @@
} \
} while (0)
+#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))
#define WREG32_RLC(reg, value) \
do { \
- if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
uint32_t i = 0; \
uint32_t retries = 50000; \
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
@@ -96,7 +98,7 @@
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
- if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
new file mode 100644
index 000000000000..0d6b50528d76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_0.h"
+#include "amdgpu.h"
+
+static void umc_v6_0_init_registers(struct amdgpu_device *adev)
+{
+ unsigned i,j;
+
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002);
+}
+
+const struct amdgpu_umc_funcs umc_v6_0_funcs = {
+ .init_registers = umc_v6_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
new file mode 100644
index 000000000000..109f1a57a46e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_0_H__
+#define __UMC_V6_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+extern const struct amdgpu_umc_funcs umc_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
new file mode 100644
index 000000000000..793bf70e64b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_1.h"
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+
+#include "rsmu/rsmu_0_0_2_offset.h"
+#include "rsmu/rsmu_0_0_2_sh_mask.h"
+#include "umc/umc_6_1_1_offset.h"
+#include "umc/umc_6_1_1_sh_mask.h"
+#include "umc/umc_6_1_2_offset.h"
+
+#define UMC_6_INST_DIST 0x40000
+
+/*
+ * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/* channel index is the index of 256B block */
+#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
+/* offset in 256B block */
+#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
+
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
+const uint32_t
+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
+ {2, 18, 11, 27}, {4, 20, 13, 29},
+ {1, 17, 8, 24}, {7, 23, 14, 30},
+ {10, 26, 3, 19}, {12, 28, 5, 21},
+ {9, 25, 0, 16}, {15, 31, 6, 22}
+};
+
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 1);
+}
+
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 0);
+}
+
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
+{
+ uint32_t rsmu_umc_index;
+
+ rsmu_umc_index = RREG32_SOC15(RSMU, 0,
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+
+ return REG_GET_FIELD(rsmu_umc_index,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN);
+}
+
+static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
+}
+
+static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_1_CE_CNT_INIT);
+ /* clear the lower chip err count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_1_CE_CNT_INIT);
+ /* clear the higher chip err count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v6_1_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t umc_reg_offset,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint32_t lsb, mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
+ struct eeprom_table_record *err_rec;
+ uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+ }
+
+ /* skip error address process if -ENOMEM */
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return;
+ }
+
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ /* calculate error address if ue/ce error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ /* the lowest lsb bits should be ignored */
+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr &= ~((0x1ULL << lsb) - 1);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* we only save ue error information currently, ce is skipped */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
+ == 1) {
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+ }
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+}
+
+static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ ch_inst,
+ umc_inst);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ }
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+}
+
+static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
+{
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+const struct amdgpu_umc_funcs umc_v6_1_funcs = {
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+ .ras_late_init = amdgpu_umc_ras_late_init,
+ .query_ras_error_count = umc_v6_1_query_ras_error_count,
+ .query_ras_error_address = umc_v6_1_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
new file mode 100644
index 000000000000..0ce1d323cfdd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_1_H__
+#define __UMC_V6_1_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* HBM Memory Channel Width */
+#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128
+/* number of umc channel instance with memory map register access */
+#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4
+/* number of umc instance with memory map register access */
+#define UMC_V6_1_UMC_INSTANCE_NUM 8
+/* total channel instances in one umc block */
+#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
+/* UMC regiser per channel offset */
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20 0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT 0x400
+
+/* EccErrCnt max value */
+#define UMC_V6_1_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V6_1_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
+
+extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
+extern const uint32_t
+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 01e62fb8e6e0..0fa8aae2d78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -763,7 +763,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 670784a78512..e0aadcaf6c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -268,13 +269,14 @@ err:
*/
static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -327,13 +329,20 @@ err:
static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
@@ -1410,7 +1421,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index a6bfe7651d07..0995378d8263 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -275,13 +276,14 @@ err:
* Close up a stream for HW test or if userspace failed to do so
*/
static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -334,13 +336,20 @@ err:
static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
@@ -1763,7 +1774,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
@@ -1796,7 +1807,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
.set_wptr = uvd_v7_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 475ae68f38f5..217db187207c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -739,7 +739,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
int i;
if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index eafbe8d8248d..3fd102efb7af 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -887,7 +887,7 @@ static int vce_v4_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
int i;
if ((adev->asic_type == CHIP_POLARIS10) ||
@@ -1070,7 +1070,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
.set_wptr = vce_v4_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index dde22b7d140d..71f61afdc655 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@@ -36,21 +37,22 @@
#include "mmhub/mmhub_9_1_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+#include "jpeg_v1_0.h"
-#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
-#define mmUVD_REG_XX_MASK 0x05ac
-#define mmUVD_REG_XX_MASK_BASE_IDX 1
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1
+#define mmUVD_REG_XX_MASK_1_0 0x05ac
+#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
+ int inst_idx, struct dpg_pause_state *new_state);
+
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);
/**
* vcn_v1_0_early_init - set function pointers
@@ -63,13 +65,15 @@ static int vcn_v1_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->vcn.num_vcn_inst = 1;
adev->vcn.num_enc_rings = 2;
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
- vcn_v1_0_set_jpeg_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
+ jpeg_v1_0_early_init(handle);
+
return 0;
}
@@ -87,27 +91,26 @@ static int vcn_v1_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* VCN DEC TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq);
if (r)
return r;
/* VCN ENC TRAP */
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
- &adev->vcn.irq);
+ &adev->vcn.inst->irq);
if (r)
return r;
}
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq);
- if (r)
- return r;
-
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
+ /* Override the work func */
+ adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
@@ -122,42 +125,36 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->vcn.ring_dec;
+ ring = &adev->vcn.inst->ring_dec;
sprintf(ring->name, "vcn_dec");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
if (r)
return r;
- adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 =
+ adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = adev->vcn.external.data0 =
+ adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = adev->vcn.external.data1 =
+ adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = adev->vcn.external.cmd =
+ adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = adev->vcn.external.nop =
+ adev->vcn.internal.nop = adev->vcn.inst->external.nop =
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.ring_enc[i];
+ ring = &adev->vcn.inst->ring_enc[i];
sprintf(ring->name, "vcn_enc%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
if (r)
return r;
}
- ring = &adev->vcn.ring_jpeg;
- sprintf(ring->name, "vcn_jpeg");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
- if (r)
- return r;
-
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
- adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch =
- SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = jpeg_v1_0_sw_init(handle);
+
+ return r;
}
/**
@@ -176,6 +173,8 @@ static int vcn_v1_0_sw_fini(void *handle)
if (r)
return r;
+ jpeg_v1_0_sw_fini(handle);
+
r = amdgpu_vcn_sw_fini(adev);
return r;
@@ -191,7 +190,7 @@ static int vcn_v1_0_sw_fini(void *handle)
static int vcn_v1_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
r = amdgpu_ring_test_helper(ring);
@@ -199,14 +198,13 @@ static int vcn_v1_0_hw_init(void *handle)
goto done;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.ring_enc[i];
- ring->sched.ready = true;
+ ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
- ring = &adev->vcn.ring_jpeg;
+ ring = &adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
@@ -229,7 +227,7 @@ done:
static int vcn_v1_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
RREG32_SOC15(VCN, 0, mmUVD_STATUS))
@@ -304,9 +302,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
offset = 0;
} else {
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr));
+ lower_32_bits(adev->vcn.inst->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr));
+ upper_32_bits(adev->vcn.inst->gpu_addr));
offset = size;
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
@@ -316,17 +314,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
/* cache window 1: stack */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset));
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset));
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
/* cache window 2: context */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
@@ -374,9 +372,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
offset = 0;
} else {
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
+ lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
+ upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
offset = size;
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
@@ -386,9 +384,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/* cache window 1: stack */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
@@ -396,10 +394,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/* cache window 2: context */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
@@ -779,7 +777,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
*/
static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
int i, j, r;
@@ -837,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
vcn_v1_0_mc_resume_spg_mode(adev);
- WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK,
- RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3);
+ WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
+ RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3);
/* enable VCPU clock */
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
@@ -932,43 +930,28 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
- ring = &adev->vcn.ring_enc[0];
+ ring = &adev->vcn.inst->ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
- ring = &adev->vcn.ring_enc[1];
+ ring = &adev->vcn.inst->ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
- ring = &adev->vcn.ring_jpeg;
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
- /* initialize wptr */
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- /* copy patch commands to the jpeg ring */
- vcn_v1_0_jpeg_ring_set_patch_ring(ring,
- (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+ jpeg_v1_0_start(adev, 0);
return 0;
}
static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -1105,13 +1088,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
- /* initialize JPEG wptr */
- ring = &adev->vcn.ring_jpeg;
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- /* copy patch commands to the jpeg ring */
- vcn_v1_0_jpeg_ring_set_patch_ring(ring,
- (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+ jpeg_v1_0_start(adev, 1);
return 0;
}
@@ -1222,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
}
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
+ int inst_idx, struct dpg_pause_state *new_state)
{
int ret_code;
uint32_t reg_data = 0;
@@ -1230,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@@ -1255,21 +1233,21 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
- ring = &adev->vcn.ring_enc[0];
+ ring = &adev->vcn.inst->ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- ring = &adev->vcn.ring_enc[1];
+ ring = &adev->vcn.inst->ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- ring = &adev->vcn.ring_dec;
+ ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1281,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.fw_based = new_state->fw_based;
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+ if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@@ -1315,7 +1294,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
- ring = &adev->vcn.ring_jpeg;
+ ring = &adev->jpeg.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1329,7 +1308,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- ring = &adev->vcn.ring_dec;
+ ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1341,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.jpeg = new_state->jpeg;
+ adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg;
}
return 0;
@@ -1369,7 +1348,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
@@ -1596,7 +1575,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0])
+ if (ring == &adev->vcn.inst->ring_enc[0])
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
else
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
@@ -1613,7 +1592,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0])
+ if (ring == &adev->vcn.inst->ring_enc[0])
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
else
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
@@ -1630,7 +1609,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0])
+ if (ring == &adev->vcn.inst->ring_enc[0])
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
lower_32_bits(ring->wptr));
else
@@ -1715,389 +1694,6 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-
-/**
- * vcn_v1_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
-{
- struct amdgpu_device *adev = ring->adev;
-
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0xffffffff);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
-
- /* emit trap */
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
-{
- struct amdgpu_device *adev = ring->adev;
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, ib->length_dw);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
- amdgpu_ring_write(ring, 0x2);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val,
- uint32_t mask)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, val);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE3));
- }
- amdgpu_ring_write(ring, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
-{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t data0, data1, mask;
-
- pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
- /* wait for register write */
- data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
- data1 = lower_32_bits(pd_addr);
- mask = 0xffffffff;
- vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- }
- amdgpu_ring_write(ring, val);
-}
-
-static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
- int i;
-
- WARN_ON(ring->wptr % 2 || count % 2);
-
- for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-}
-
-static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
-{
- struct amdgpu_device *adev = ring->adev;
- ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- ring->ring[(*ptr)++] = 0;
- ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
- } else {
- ring->ring[(*ptr)++] = reg_offset;
- ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
- }
- ring->ring[(*ptr)++] = val;
-}
-
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
-{
- struct amdgpu_device *adev = ring->adev;
-
- uint32_t reg, reg_offset, val, mask, i;
-
- // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
- reg_offset = (reg << 2);
- val = lower_32_bits(ring->gpu_addr);
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
- reg_offset = (reg << 2);
- val = upper_32_bits(ring->gpu_addr);
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 3rd to 5th: issue MEM_READ commands
- for (i = 0; i <= 2; i++) {
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
- ring->ring[ptr++] = 0;
- }
-
- // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x13;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 7th: program mmUVD_JRBC_RB_REF_DATA
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
- reg_offset = (reg << 2);
- val = 0x1;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x1;
- mask = 0x1;
-
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
- ring->ring[ptr++] = 0x01400200;
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
- ring->ring[ptr++] = val;
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- ring->ring[ptr++] = 0;
- ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
- } else {
- ring->ring[ptr++] = reg_offset;
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
- }
- ring->ring[ptr++] = mask;
-
- //9th to 21st: insert no-op
- for (i = 0; i <= 12; i++) {
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
- ring->ring[ptr++] = 0;
- }
-
- //22nd: reset mmUVD_JRBC_RB_RPTR
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
- reg_offset = (reg << 2);
- val = 0;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x12;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-}
-
static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2114,16 +1710,13 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 124:
- amdgpu_fence_process(&adev->vcn.ring_dec);
+ amdgpu_fence_process(&adev->vcn.inst->ring_dec);
break;
case 119:
- amdgpu_fence_process(&adev->vcn.ring_enc[0]);
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
break;
case 120:
- amdgpu_fence_process(&adev->vcn.ring_enc[1]);
- break;
- case 126:
- amdgpu_fence_process(&adev->vcn.ring_jpeg);
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -2173,6 +1766,86 @@ static int vcn_v1_0_set_powergating_state(void *handle,
return ret;
}
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
+
+ if (fences == 0) {
+ amdgpu_gfx_off_ctrl(adev, true);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ } else {
+ schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (set_clocks) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+}
+
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
@@ -2198,7 +1871,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
.set_wptr = vcn_v1_0_dec_ring_set_wptr,
@@ -2219,7 +1892,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.insert_start = vcn_v1_0_dec_ring_insert_start,
.insert_end = vcn_v1_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
+ .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
@@ -2232,7 +1905,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
.set_wptr = vcn_v1_0_enc_ring_set_wptr,
@@ -2251,51 +1924,16 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v1_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
+ .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
- .support_64bit_ptrs = false,
- .no_user_fence = true,
- .vmhub = AMDGPU_MMHUB,
- .extra_dw = 64,
- .get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
- .get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
- .set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
- .emit_frame_size =
- 6 + 6 + /* hdp invalidate / flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */
- 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */
- 6,
- .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v1_0_jpeg_ring_nop,
- .insert_start = vcn_v1_0_jpeg_ring_insert_start,
- .insert_end = vcn_v1_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
+ adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
DRM_INFO("VCN decode is enabled in VM mode\n");
}
@@ -2304,17 +1942,11 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
DRM_INFO("VCN encode is enabled in VM mode\n");
}
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
- DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
.set = vcn_v1_0_set_interrupt_state,
.process = vcn_v1_0_process_interrupt,
@@ -2322,8 +1954,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
- adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs;
+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
}
const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
index 2a497a7a4840..f67d7391fc21 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -24,6 +24,8 @@
#ifndef __VCN_V1_0_H__
#define __VCN_V1_0_H__
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+
extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index dfde886cc6bd..c387c81f8695 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -22,7 +22,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "soc15.h"
@@ -47,39 +47,13 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
-#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
-#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
-#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
-#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
-#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
-#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
-#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
-#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
-#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
-#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
-
-#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
-
-#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
-#define mmUVD_REG_XX_MASK 0x026c
-#define mmUVD_REG_XX_MASK_BASE_IDX 1
-
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
+ int inst_idx, struct dpg_pause_state *new_state);
/**
* vcn_v2_0_early_init - set function pointers
@@ -92,11 +66,11 @@ static int vcn_v2_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->vcn.num_vcn_inst = 1;
adev->vcn.num_enc_rings = 2;
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
- vcn_v2_0_set_jpeg_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
return 0;
@@ -118,7 +92,7 @@ static int vcn_v2_0_sw_init(void *handle)
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT,
- &adev->vcn.irq);
+ &adev->vcn.inst->irq);
if (r)
return r;
@@ -126,18 +100,11 @@ static int vcn_v2_0_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
- &adev->vcn.irq);
+ &adev->vcn.inst->irq);
if (r)
return r;
}
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_2_0__SRCID__JPEG_DECODE,
- &adev->vcn.irq);
- if (r)
- return r;
-
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
@@ -156,50 +123,46 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->vcn.ring_dec;
+ ring = &adev->vcn.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
sprintf(ring->name, "vcn_dec");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
if (r)
return r;
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
- adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+ adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
- adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
- adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
- adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
- adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+ adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.ring_enc[i];
+ ring = &adev->vcn.inst->ring_enc[i];
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
sprintf(ring->name, "vcn_enc%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
if (r)
return r;
}
- ring = &adev->vcn.ring_jpeg;
- ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
- sprintf(ring->name, "vcn_jpeg");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
- if (r)
- return r;
-
adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
-
return 0;
}
@@ -234,35 +197,21 @@ static int vcn_v2_0_sw_fini(void *handle)
static int vcn_v2_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
- adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
- ring->doorbell_index);
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, 0);
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
goto done;
- }
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.ring_enc[i];
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ ring = &adev->vcn.inst->ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
goto done;
- }
- }
-
- ring = &adev->vcn.ring_jpeg;
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- goto done;
}
done:
@@ -283,7 +232,7 @@ done:
static int vcn_v2_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i;
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
@@ -294,13 +243,10 @@ static int vcn_v2_0_hw_fini(void *handle)
ring->sched.ready = false;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.ring_enc[i];
+ ring = &adev->vcn.inst->ring_enc[i];
ring->sched.ready = false;
}
- ring = &adev->vcn.ring_jpeg;
- ring->sched.ready = false;
-
return 0;
}
@@ -368,9 +314,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
offset = 0;
} else {
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr));
+ lower_32_bits(adev->vcn.inst->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr));
+ upper_32_bits(adev->vcn.inst->gpu_addr));
offset = size;
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
@@ -380,22 +326,21 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
/* cache window 1: stack */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset));
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset));
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
/* cache window 2: context */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
@@ -406,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.gpu_addr), 0, indirect);
+ upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
offset = size;
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@@ -633,146 +578,23 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
/**
- * jpeg_v2_0_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_0_start(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = &adev->vcn.ring_jpeg;
- uint32_t tmp;
- int r = 0;
-
- /* disable power gating */
- tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
- SOC15_WAIT_ON_RREG(VCN, 0,
- mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
- if (r) {
- DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
- return r;
- }
-
- /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
- /* JPEG disable CGC */
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
- | JPEG_CGC_GATE__JPEG2_DEC_MASK
- | JPEG_CGC_GATE__JPEG_ENC_MASK
- | JPEG_CGC_GATE__JMCIF_MASK
- | JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- return 0;
-}
-
-/**
- * jpeg_v2_0_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_0_stop(struct amdgpu_device *adev)
-{
- uint32_t tmp;
- int r = 0;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable JPEG CGC */
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
-
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
- |JPEG_CGC_GATE__JPEG2_DEC_MASK
- |JPEG_CGC_GATE__JPEG_ENC_MASK
- |JPEG_CGC_GATE__JMCIF_MASK
- |JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
- /* enable power gating */
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS));
- tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
- tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
- tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
- (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
- if (r) {
- DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
- return r;
- }
-
- return r;
-}
-
-/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
* @adev: amdgpu_device pointer
@@ -920,7 +742,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
{
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
vcn_v2_0_enable_static_power_gating(adev);
@@ -932,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
if (indirect)
- adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr;
+ adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
@@ -941,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@@ -957,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -986,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* release VCPU reset to boot */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
/* enable LMI MC and UMC channels */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_CTRL2),
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
/* enable master interrupt */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
if (indirect)
- psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.dpg_sram_cpu_addr));
+ psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
@@ -1046,7 +868,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
static int vcn_v2_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
int i, j, r;
@@ -1054,12 +876,8 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
- if (r)
- return r;
- goto jpeg;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
vcn_v2_0_disable_static_power_gating(adev);
@@ -1197,24 +1015,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr));
- ring = &adev->vcn.ring_enc[0];
+ ring = &adev->vcn.inst->ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
- ring = &adev->vcn.ring_enc[1];
+ ring = &adev->vcn.inst->ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
-jpeg:
- r = jpeg_v2_0_start(adev);
-
- return r;
+ return 0;
}
static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
@@ -1233,9 +1048,6 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
- tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
-
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
@@ -1254,10 +1066,6 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
uint32_t tmp;
int r;
- r = jpeg_v2_0_stop(adev);
- if (r)
- return r;
-
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
r = vcn_v2_0_stop_dpg_mode(adev);
if (r)
@@ -1322,16 +1130,16 @@ power_off:
}
static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
+ int inst_idx, struct dpg_pause_state *new_state)
{
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d -> %d",
- adev->vcn.pause_state.fw_based, new_state->fw_based);
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
@@ -1351,14 +1159,14 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
- ring = &adev->vcn.ring_enc[0];
+ ring = &adev->vcn.inst->ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- ring = &adev->vcn.ring_enc[1];
+ ring = &adev->vcn.inst->ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
@@ -1377,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.fw_based = new_state->fw_based;
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
return 0;
@@ -1405,7 +1213,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
@@ -1480,11 +1288,13 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
*
* Write a start command to the ring.
*/
-static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
{
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
}
@@ -1495,9 +1305,11 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
*
* Write a end command to the ring.
*/
-static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
{
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
}
@@ -1508,14 +1320,15 @@ static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
*
* Write a nop command to the ring.
*/
-static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
+ struct amdgpu_device *adev = ring->adev;
int i;
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
amdgpu_ring_write(ring, 0);
}
}
@@ -1528,30 +1341,31 @@ static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
*
* Write a fence and a trap command to the ring.
*/
-static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
+void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
{
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+ struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0));
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
}
@@ -1564,44 +1378,46 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
*
* Write ring commands to execute the indirect buffer
*/
-static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
+void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
{
+ struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
-static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val,
- uint32_t mask)
+void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
{
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
}
-static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t data0, data1, mask;
@@ -1615,16 +1431,18 @@ static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
}
-static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val)
+void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
{
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
}
@@ -1640,7 +1458,7 @@ static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0])
+ if (ring == &adev->vcn.inst->ring_enc[0])
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
else
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
@@ -1657,7 +1475,7 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0]) {
+ if (ring == &adev->vcn.inst->ring_enc[0]) {
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
else
@@ -1681,7 +1499,7 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vcn.ring_enc[0]) {
+ if (ring == &adev->vcn.inst->ring_enc[0]) {
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
@@ -1706,8 +1524,8 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
*
* Write enc a fence and a trap command to the ring.
*/
-static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
- u64 seq, unsigned flags)
+void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -1718,7 +1536,7 @@ static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
}
-static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
}
@@ -1731,10 +1549,10 @@ static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
*
* Write enc ring commands to execute the indirect buffer
*/
-static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
+void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
@@ -1745,9 +1563,8 @@ static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
-static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val,
- uint32_t mask)
+void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
{
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
amdgpu_ring_write(ring, reg << 2);
@@ -1755,8 +1572,8 @@ static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
@@ -1767,282 +1584,13 @@ static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
lower_32_bits(pd_addr), 0xffffffff);
}
-static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val)
+void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
{
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring, val);
}
-/**
- * vcn_v2_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
- }
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
-{
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
-{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, ib->length_dw);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
- amdgpu_ring_write(ring, 0x2);
-}
-
-static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val,
- uint32_t mask)
-{
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, val);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE3));
- }
- amdgpu_ring_write(ring, mask);
-}
-
-static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
-{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t data0, data1, mask;
-
- pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
- /* wait for register write */
- data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
- data1 = lower_32_bits(pd_addr);
- mask = 0xffffffff;
- vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val)
-{
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- }
- amdgpu_ring_write(ring, val);
-}
-
-static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
- int i;
-
- WARN_ON(ring->wptr % 2 || count % 2);
-
- for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-}
-
static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2059,16 +1607,13 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
- amdgpu_fence_process(&adev->vcn.ring_dec);
+ amdgpu_fence_process(&adev->vcn.inst->ring_dec);
break;
case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
- amdgpu_fence_process(&adev->vcn.ring_enc[0]);
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
break;
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
- amdgpu_fence_process(&adev->vcn.ring_enc[1]);
- break;
- case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->vcn.ring_jpeg);
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -2079,27 +1624,27 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
+int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
unsigned i;
int r;
- WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD);
+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 4);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.external.scratch9);
+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -2158,7 +1703,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v2_0_dec_ring_get_rptr,
.get_wptr = vcn_v2_0_dec_ring_get_wptr,
.set_wptr = vcn_v2_0_dec_ring_set_wptr,
@@ -2189,7 +1734,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v2_0_enc_ring_get_rptr,
.get_wptr = vcn_v2_0_enc_ring_get_wptr,
.set_wptr = vcn_v2_0_enc_ring_set_wptr,
@@ -2215,39 +1760,9 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB,
- .get_rptr = vcn_v2_0_jpeg_ring_get_rptr,
- .get_wptr = vcn_v2_0_jpeg_ring_get_wptr,
- .set_wptr = vcn_v2_0_jpeg_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
- 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
- 8 + 16,
- .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v2_0_jpeg_ring_nop,
- .insert_start = vcn_v2_0_jpeg_ring_insert_start,
- .insert_end = vcn_v2_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
+ adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
DRM_INFO("VCN decode is enabled in VM mode\n");
}
@@ -2256,17 +1771,11 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
DRM_INFO("VCN encode is enabled in VM mode\n");
}
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
- DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
.set = vcn_v2_0_set_interrupt_state,
.process = vcn_v2_0_process_interrupt,
@@ -2274,8 +1783,8 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
- adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs;
+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
}
const struct amdgpu_ip_block_version vcn_v2_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
index a74227f4663b..6c9de1882428 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
@@ -24,6 +24,32 @@
#ifndef __VCN_V2_0_H__
#define __VCN_V2_0_H__
+extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr);
+extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val);
+extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring);
+
+extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring);
+extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags);
+extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+
extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
#endif /* __VCN_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
new file mode 100644
index 000000000000..2d64ba1adf99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -0,0 +1,1799 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v1_0.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
+
+#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+
+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * vcn_v2_5_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v2_5_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ u32 harvest;
+ int i;
+
+ adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->vcn.harvest_config |= 1 << i;
+ }
+
+ if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
+ AMDGPU_VCN_HARVEST_VCN1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+ } else
+ adev->vcn.num_vcn_inst = 1;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.num_vcn_inst = 2;
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_enc_rings = 1;
+ } else {
+ adev->vcn.num_enc_rings = 2;
+ }
+
+ vcn_v2_5_set_dec_ring_funcs(adev);
+ vcn_v2_5_set_enc_ring_funcs(adev);
+ vcn_v2_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v2_5_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
+ if (r)
+ return r;
+
+ /* VCN ENC TRAP */
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
+ if (r)
+ return r;
+ }
+ }
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) {
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ DRM_INFO("PSP loading VCN firmware\n");
+ }
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
+
+ ring = &adev->vcn.inst[j].ring_dec;
+ ring->use_doorbell = true;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? 2*j : 8*j);
+ sprintf(ring->name, "vcn_dec_%d", j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ ring->use_doorbell = true;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+
+ sprintf(ring->name, "vcn_enc_%d.%d", j, i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+ if (r)
+ return r;
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v2_5_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v2_5_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ r = vcn_v2_5_sriov_start(adev);
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.inst[j].ring_enc[0].sched.ready = true;
+ adev->vcn.inst[j].ring_enc[1].sched.ready = false;
+ adev->vcn.inst[j].ring_enc[2].sched.ready = false;
+ adev->vcn.inst[j].ring_dec.sched.ready = true;
+ } else {
+
+ ring = &adev->vcn.inst[j].ring_dec;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, j);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v2_5_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ ring = &adev->vcn.inst[i].ring_dec;
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS)))
+ vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ ring->sched.ready = false;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v2_5_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v2_5_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v2_5_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v2_5_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+ }
+}
+
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v2_5_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
+ }
+}
+
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v2_5_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
+ }
+}
+
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ return 0;
+}
+
+static int vcn_v2_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ int i, j, k, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return 0;
+
+ /*SW clock gating */
+ vcn_v2_5_disable_clock_gating(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL);
+ tmp &= ~0xff;
+ WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8|
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+ }
+
+ vcn_v2_5_mc_resume(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* VCN global tiling registers */
+ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (k = 0; k < 10; ++k) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(500);
+ else
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop = 0, size = 0;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_1_init_header *header = NULL;;
+
+ header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
+ size = header->total_size;
+
+ /*
+ * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
+ * memory descriptor location
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+ data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ /*
+ * 5, kick off the initialization and wait until
+ * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop = 10;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(100);
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev,
+ "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
+ data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size, tmp, i, rb_bufsz;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
+ struct mmsch_v1_0_cmd_end end = { { 0 } };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ header->version = MMSCH_VERSION;
+ header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
+ init_table += header->total_size;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ header->eng[i].table_offset = header->total_size;
+ header->eng[i].init_status = 0;
+ header->eng[i].table_size = 0;
+
+ table_size = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ size);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+
+ /* refine header */
+ header->eng[i].table_size = table_size;
+ header->total_size += table_size;
+ }
+
+ return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ int ret_code = 0;
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_5_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
+ if (r)
+ return r;
+
+ /* block LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+
+ vcn_v2_5_enable_clock_gating(adev);
+
+ /* enable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = vcn_v2_5_dec_ring_get_rptr,
+ .get_wptr = vcn_v2_5_dec_ring_get_wptr,
+ .set_wptr = vcn_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = vcn_v2_0_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
+}
+
+/**
+ * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
+ }
+}
+
+/**
+ * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = vcn_v2_5_enc_ring_get_rptr,
+ .get_wptr = vcn_v2_5_enc_ring_get_wptr,
+ .set_wptr = vcn_v2_5_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
+ adev->vcn.inst[i].ring_dec.me = i;
+ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
+ }
+}
+
+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
+ adev->vcn.inst[j].ring_enc[i].me = j;
+ }
+ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
+ }
+}
+
+static bool vcn_v2_5_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+static int vcn_v2_5_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE, ret);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int vcn_v2_5_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (enable) {
+ if (vcn_v2_5_is_idle(handle))
+ return -EBUSY;
+ vcn_v2_5_enable_clock_gating(adev);
+ } else {
+ vcn_v2_5_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if(state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v2_5_stop(adev);
+ else
+ ret = vcn_v2_5_start(adev);
+
+ if(!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
+ .set = vcn_v2_5_set_interrupt_state,
+ .process = vcn_v2_5_process_interrupt,
+};
+
+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
+ .name = "vcn_v2_5",
+ .early_init = vcn_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v2_5_sw_init,
+ .sw_fini = vcn_v2_5_sw_fini,
+ .hw_init = vcn_v2_5_hw_init,
+ .hw_fini = vcn_v2_5_hw_fini,
+ .suspend = vcn_v2_5_suspend,
+ .resume = vcn_v2_5_resume,
+ .is_idle = vcn_v2_5_is_idle,
+ .wait_for_idle = vcn_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v2_5_set_clockgating_state,
+ .set_powergating_state = vcn_v2_5_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 2,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &vcn_v2_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
new file mode 100644
index 000000000000..8d9c0800b8e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V2_5_H__
+#define __VCN_V2_5_H__
+
+extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
+
+#endif /* __VCN_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 22260e6963b8..407c6093c2ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -50,7 +50,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return;
@@ -64,7 +64,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 1);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
@@ -80,7 +80,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 1);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
@@ -106,7 +106,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return;
@@ -125,7 +125,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 0);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
@@ -145,7 +145,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 0);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
@@ -219,14 +219,14 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
static int vega10_ih_irq_init(struct amdgpu_device *adev)
{
struct amdgpu_ih_ring *ih;
- u32 ih_rb_cntl;
+ u32 ih_rb_cntl, ih_chicken;
int ret = 0;
u32 tmp;
/* disable irqs */
vega10_ih_disable_interrupts(adev);
- adev->nbio_funcs->ih_control(adev);
+ adev->nbio.funcs->ih_control(adev);
ih = &adev->irq.ih;
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
@@ -237,8 +237,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
!!adev->irq.msi_enabled);
-
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return -ETIMEDOUT;
@@ -247,6 +246,20 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
}
+ if ((adev->asic_type == CHIP_ARCTURUS &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ adev->asic_type == CHIP_RENOIR) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ } else {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_FBPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ }
+
/* set the writeback address whether it's enabled or not */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
lower_32_bits(ih->wptr_addr));
@@ -272,7 +285,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WPTR_OVERFLOW_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
RB_FULL_DRAIN_ENABLE, 1);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
@@ -299,7 +312,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
@@ -664,10 +677,49 @@ static int vega10_ih_soft_reset(void *handle)
return 0;
}
+static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ /**
+ * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE
+ * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field.
+ */
+ if (adev->asic_type > CHIP_VEGA10) {
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val);
+ }
+
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+}
+
static int vega10_ih_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega10_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
return 0;
+
}
static int vega10_ih_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index a8e92638a2e8..6b52a539d51b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,6 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "vega10_ip_offset.h"
int vega10_reg_base_init(struct amdgpu_device *adev)
@@ -81,6 +80,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;
adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;
adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7;
adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP;
adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 0db84386252a..556f854e3551 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -24,7 +24,6 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "vega20_ip_offset.h"
int vega20_reg_base_init(struct amdgpu_device *adev)
@@ -50,6 +49,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
}
return 0;
}
@@ -85,6 +86,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7;
adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP;
adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6575ddcfcf00..78b35901643b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -690,15 +690,15 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
}
/**
- * vi_asic_reset - soft reset GPU
+ * vi_asic_pci_config_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
* Returns 0 for success.
*/
-static int vi_asic_reset(struct amdgpu_device *adev)
+static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
{
int r;
@@ -711,6 +711,70 @@ static int vi_asic_reset(struct amdgpu_device *adev)
return r;
}
+static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
+static enum amd_reset_method
+vi_asic_reset_method(struct amdgpu_device *adev)
+{
+ bool baco_reset;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * vi_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int vi_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ r = vi_asic_pci_config_reset(adev);
+ }
+
+ return r;
+}
+
static u32 vi_get_config_memsize(struct amdgpu_device *adev)
{
return RREG32(mmCONFIG_MEMSIZE);
@@ -1023,6 +1087,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.read_bios_from_rom = &vi_read_bios_from_rom,
.read_register = &vi_read_register,
.reset = &vi_asic_reset,
+ .reset_method = &vi_asic_reset_method,
.set_vga_state = &vi_vga_set_state,
.get_xclk = &vi_get_xclk,
.set_uvd_clocks = &vi_set_uvd_clocks,
@@ -1035,6 +1100,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.get_pcie_usage = &vi_get_pcie_usage,
.need_reset_on_init = &vi_need_reset_on_init,
.get_pcie_replay_count = &vi_get_pcie_replay_count,
+ .supports_baco = &vi_asic_supports_baco,
};
#define CZ_REV_BRISTOL(rev) \
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 8de0772f986c..defb4aaf929a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -31,4 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev,
int vi_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
#endif
OpenPOWER on IntegriCloud