diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
204 files changed, 25669 insertions, 8772 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index f6e5c0282fc1..9375e7f12420 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT config DRM_AMDGPU_SI bool "Enable amdgpu support for SI parts" depends on DRM_AMDGPU @@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on HMM_MIRROR + depends on MMU + select HMM_MIRROR + select MMU_NOTIFIER help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 56e084367b93..c2bbcdd9c875 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -53,8 +53,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_discovery.o + amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ + amdgpu_umc.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -66,7 +67,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ + arct_reg_init.o navi12_reg_init.o mxgpu_nv.o # add DF block amdgpu-y += \ @@ -77,9 +79,13 @@ amdgpu-y += \ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o +# add UMC block +amdgpu-y += \ + umc_v6_1.o umc_v6_0.o + # add IH block amdgpu-y += \ amdgpu_irq.o \ @@ -95,7 +101,8 @@ amdgpu-y += \ amdgpu_psp.o \ psp_v3_1.o \ psp_v10_0.o \ - psp_v11_0.o + psp_v11_0.o \ + psp_v12_0.o # add SMC block amdgpu-y += \ @@ -113,6 +120,7 @@ amdgpu-y += \ amdgpu_rlc.o \ gfx_v8_0.o \ gfx_v9_0.o \ + gfx_v9_4.o \ gfx_v10_0.o # add async DMA block @@ -140,14 +148,20 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o -# add VCN block +# add VCN and JPEG block amdgpu-y += \ amdgpu_vcn.o \ vcn_v1_0.o \ - vcn_v2_0.o + vcn_v2_0.o \ + vcn_v2_5.o \ + amdgpu_jpeg.o \ + jpeg_v1_0.o \ + jpeg_v2_0.o \ + jpeg_v2_5.o # add ATHUB block amdgpu-y += \ + athub_v1_0.o \ athub_v2_0.o # add amdkfd interfaces @@ -162,6 +176,7 @@ amdgpu-y += \ amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o \ amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_gfx_v10.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8199d201b43a..da3bcff61b97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -69,10 +69,12 @@ #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "amdgpu_vcn.h" +#include "amdgpu_jpeg.h" #include "amdgpu_mn.h" #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" +#include "amdgpu_nbio.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -86,6 +88,9 @@ #include "amdgpu_smu.h" #include "amdgpu_discovery.h" #include "amdgpu_mes.h" +#include "amdgpu_umc.h" +#include "amdgpu_mmhub.h" +#include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -104,6 +109,8 @@ struct amdgpu_mgpu_info uint32_t num_apu; }; +#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256 + /* * Modules parameters. */ @@ -120,6 +127,7 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; +extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -133,6 +141,7 @@ extern int amdgpu_vm_fragment_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; extern int amdgpu_vm_update_mode; +extern int amdgpu_exp_hw_support; extern int amdgpu_dc; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; @@ -144,11 +153,7 @@ extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; -extern int amdgpu_ngg; -extern int amdgpu_prim_buf_per_se; -extern int amdgpu_pos_buf_per_se; -extern int amdgpu_cntl_sb_buf_per_se; -extern int amdgpu_param_buf_per_se; +extern uint amdgpu_force_long_training; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; @@ -165,6 +170,12 @@ extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; extern int amdgpu_noretry; +extern int amdgpu_force_asic_type; +#ifdef CONFIG_HSA_AMD +extern int sched_policy; +#else +static const int sched_policy = KFD_SCHED_POLICY_HWS; +#endif #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -281,6 +292,9 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; +#define HW_REV(_Major, _Minor, _Rev) \ + ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) + struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -423,7 +437,6 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -475,7 +488,6 @@ struct amdgpu_cs_parser { uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; uint64_t bytes_moved_vis; - struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; @@ -532,6 +544,14 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +enum amd_reset_method { + AMD_RESET_METHOD_LEGACY = 0, + AMD_RESET_METHOD_MODE0, + AMD_RESET_METHOD_MODE1, + AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_BACO +}; + /* * ASIC specific functions. */ @@ -543,6 +563,7 @@ struct amdgpu_asic_funcs { u32 sh_num, u32 reg_offset, u32 *value); void (*set_vga_state)(struct amdgpu_device *adev, bool state); int (*reset)(struct amdgpu_device *adev); + enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); /* get the reference clock */ u32 (*get_xclk)(struct amdgpu_device *adev); /* MM block clocks */ @@ -569,6 +590,8 @@ struct amdgpu_asic_funcs { bool (*need_reset_on_init)(struct amdgpu_device *adev); /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); + /* device supports BACO */ + bool (*supports_baco)(struct amdgpu_device *adev); }; /* @@ -613,6 +636,10 @@ struct amdgpu_fw_vram_usage { u64 size; struct amdgpu_bo *reserved_bo; void *va; + + /* GDDR6 training support flag. + */ + bool mem_train_support; }; /* @@ -627,91 +654,29 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); + typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); - -/* - * amdgpu nbio functions - * - */ -struct nbio_hdp_flush_reg { - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; -}; - struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; }; -struct amdgpu_nbio_funcs { - const struct nbio_hdp_flush_reg *hdp_flush_reg; - u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); - u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); - u32 (*get_rev_id)(struct amdgpu_device *adev); - void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - u32 (*get_memsize)(struct amdgpu_device *adev); - void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size); - void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index); - void (*enable_doorbell_aperture)(struct amdgpu_device *adev, - bool enable); - void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, - bool enable); - void (*ih_doorbell_range)(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*ih_control)(struct amdgpu_device *adev); - void (*init_registers)(struct amdgpu_device *adev); - void (*detect_hw_virt)(struct amdgpu_device *adev); - void (*remap_hdp_registers)(struct amdgpu_device *adev); -}; - -struct amdgpu_df_funcs { - void (*sw_init)(struct amdgpu_device *adev); - void (*enable_broadcast_mode)(struct amdgpu_device *adev, - bool enable); - u32 (*get_fb_channel_number)(struct amdgpu_device *adev); - u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, - bool enable); - int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_enable); - int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_disable); - void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); -}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, HDP_HWIP, SDMA0_HWIP, SDMA1_HWIP, + SDMA2_HWIP, + SDMA3_HWIP, + SDMA4_HWIP, + SDMA5_HWIP, + SDMA6_HWIP, + SDMA7_HWIP, MMHUB_HWIP, ATHUB_HWIP, NBIO_HWIP, @@ -719,6 +684,7 @@ enum amd_hw_ip_block_type { MP1_HWIP, UVD_HWIP, VCN_HWIP = UVD_HWIP, + JPEG_HWIP = VCN_HWIP, VCE_HWIP, DF_HWIP, DCE_HWIP, @@ -728,10 +694,12 @@ enum amd_hw_ip_block_type { NBIF_HWIP, THM_HWIP, CLK_HWIP, + UMC_HWIP, + RSMU_HWIP, MAX_HWIP }; -#define HWIP_MAX_INSTANCE 6 +#define HWIP_MAX_INSTANCE 8 struct amd_powerplay { void *pp_handle; @@ -758,7 +726,6 @@ struct amdgpu_device { int usec_timeout; const struct amdgpu_asic_funcs *asic_funcs; bool shutdown; - bool need_dma32; bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; @@ -783,6 +750,7 @@ struct amdgpu_device { uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stolen_vga_memory; + struct amdgpu_bo *discovery_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -803,6 +771,8 @@ struct amdgpu_device { amdgpu_wreg_t pcie_wreg; amdgpu_rreg_t pciep_rreg; amdgpu_wreg_t pciep_wreg; + amdgpu_rreg64_t pcie_rreg64; + amdgpu_wreg64_t pcie_wreg64; /* protects concurrent UVD register access */ spinlock_t uvd_ctx_idx_lock; amdgpu_rreg_t uvd_ctx_rreg; @@ -836,6 +806,7 @@ struct amdgpu_device { dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; + unsigned num_vmhubs; /* memory management */ struct amdgpu_mman mman; @@ -888,6 +859,12 @@ struct amdgpu_device { u32 cg_flags; u32 pg_flags; + /* nbio */ + struct amdgpu_nbio nbio; + + /* mmhub */ + struct amdgpu_mmhub mmhub; + /* gfx */ struct amdgpu_gfx gfx; @@ -903,6 +880,9 @@ struct amdgpu_device { /* vcn */ struct amdgpu_vcn vcn; + /* jpeg */ + struct amdgpu_jpeg jpeg; + /* firmwares */ struct amdgpu_firmware firmware; @@ -915,6 +895,9 @@ struct amdgpu_device { /* KFD */ struct amdgpu_kfd_dev kfd; + /* UMC */ + struct amdgpu_umc umc; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -925,6 +908,9 @@ struct amdgpu_device { bool enable_mes; struct amdgpu_mes mes; + /* df */ + struct amdgpu_df df; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -938,9 +924,6 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_nbio_funcs *nbio_funcs; - const struct amdgpu_df_funcs *df_funcs; - /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -965,14 +948,15 @@ struct amdgpu_device { /* record last mm index being written through WREG32*/ unsigned long last_mm_index; bool in_gpu_reset; + enum pp_mp1_state mp1_state; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + struct mutex notifier_lock; + int asic_reset_res; struct work_struct xgmi_reset_work; - bool in_baco_reset; - long gfx_timeout; long sdma_timeout; long video_timeout; @@ -980,6 +964,14 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; + /* enable runtime pm on the device */ + bool runpm; + + bool pm_sysfs_en; + bool ucode_sysfs_en; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -994,6 +986,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + uint32_t *buf, size_t size, bool write); uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, @@ -1015,10 +1009,14 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define AMDGPU_REGS_IDX (1<<0) #define AMDGPU_REGS_NO_KIQ (1<<1) +#define AMDGPU_REGS_KIQ (1<<2) #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ) +#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ) + #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1033,6 +1031,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) +#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1093,6 +1093,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) +#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) @@ -1110,6 +1111,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev)) + +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1125,9 +1129,12 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); -bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_supports_boco(struct drm_device *dev); +bool amdgpu_device_supports_baco(struct drm_device *dev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_device_baco_enter(struct drm_device *dev); +int amdgpu_device_baco_exit(struct drm_device *dev); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) @@ -1165,8 +1172,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); -int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); -int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); +int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); +int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index eba42c752bca..12247a32f9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -189,7 +189,7 @@ static int acp_hw_init(void *handle) u32 val = 0; u32 count = 0; struct device *dev; - struct i2s_platform_data *i2s_pdata; + struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -231,20 +231,21 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) - return -ENOMEM; + if (adev->acp.acp_cell == NULL) { + r = -ENOMEM; + goto failure; + } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { - kfree(adev->acp.acp_res); - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } switch (adev->asic_type) { @@ -341,14 +342,14 @@ static int acp_hw_init(void *handle) r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) - return r; + goto failure; for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) { dev_err(dev, "Failed to add dev to genpd\n"); - return r; + goto failure; } } @@ -367,7 +368,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -384,7 +386,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -393,6 +396,13 @@ static int acp_hw_init(void *handle) val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); return 0; + +failure: + kfree(i2s_pdata); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + kfree(adev->acp.acp_genpd); + return r; } /** @@ -517,7 +527,7 @@ static int acp_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = state == AMD_PG_STATE_GATE ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9fa4f25a3745..8609287620ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -63,40 +63,10 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { - const struct kfd2kgd_calls *kfd2kgd; - - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_KAVERI: - case CHIP_HAWAII: - kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); - break; -#endif - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); - break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); - break; - case CHIP_NAVI10: - kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); - break; - default: - dev_info(adev->dev, "kfd not supported on this ASIC\n"); - return; - } + bool vf = amdgpu_sriov_vf(adev); adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->pdev, adev->asic_type, vf); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -160,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); - /* remove the KIQ bit as well */ - if (adev->gfx.kiq.ring.sched.ready) - clear_bit(amdgpu_gfx_mec_queue_to_bit(adev, - adev->gfx.kiq.ring.me - 1, - adev->gfx.kiq.ring.pipe, - adev->gfx.kiq.ring.queue), - gpu_resources.queue_bitmap); - /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ @@ -197,7 +159,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->doorbell_index.last_non_cp; } - kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); } } @@ -651,11 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) @@ -668,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -700,33 +692,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) return 0; } -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) -{ - return NULL; -} - struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g) + unsigned int asic_type, bool vf) { return NULL; } bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b6076d19e442..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -57,7 +57,7 @@ struct kgd_mem { unsigned int mapped_to_gpu_memory; uint64_t va; - uint32_t mapping_flags; + uint32_t alloc_flags; atomic_t invalid; struct amdkfd_process_info *process_info; @@ -136,11 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -178,10 +175,17 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); +/* Read user wptr from a specified user address space with page fault + * disabled. The memory must be pinned and mapped to the hardware when + * this is called in hqd_load functions, so it should never fault in + * the first place. This resolves a circular lock dependency involving + * four locks, including the DQM lock and mmap_sem. + */ #define read_user_wptr(mmptr, wptr, dst) \ ({ \ bool valid = false; \ if ((mmptr) && (wptr)) { \ + pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ @@ -189,6 +193,7 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s valid = !get_user((dst), (wptr)); \ unuse_mm(mmptr); \ } \ + pagefault_enable(); \ } \ valid; \ }) @@ -239,8 +244,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g); + unsigned int asic_type, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c new file mode 100644 index 000000000000..4bcc175a149d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -0,0 +1,325 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/mmu_context.h> +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "sdma0/sdma0_4_2_2_offset.h" +#include "sdma0/sdma0_4_2_2_sh_mask.h" +#include "sdma1/sdma1_4_2_2_offset.h" +#include "sdma1/sdma1_4_2_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_amdkfd_gfx_v9.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v9_4.h" + +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, sdma_rlc_reg_offset); + + return sdma_rlc_reg_offset; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); + + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); +} + +const struct kfd2kgd_calls arcturus_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 0723f800e815..a7b17c8deb00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,19 +19,9 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#undef pr_fmt -#define pr_fmt(fmt) "kfd2kgd: " fmt - -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> -#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" -#include "soc15_hw_ip.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" #include "navi10_enum.h" @@ -43,6 +33,7 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" +#include "gfxhub_v2_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -51,63 +42,6 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -#if 0 -static uint32_t get_watch_base_addr(struct amdgpu_device *adev); -#endif -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -140,37 +74,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .get_tile_config = amdgpu_amdkfd_get_tile_config, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions() -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -204,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -251,11 +154,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ATC_VMID0_PASID_MAPPING__VALID_MASK; pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); - /* - * need to do this twice, once for gfx and once for mmhub - * for ATC add 16 to VMID for mmhub, for IH different registers. - * ATC_VMID0..15 registers are separate from ATC_VMID16..31. - */ pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, @@ -307,11 +205,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[2] = { + uint32_t sdma_engine_reg_base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, /* On gfx10, mmSDMA1_xxx registers are defined NOT based @@ -323,12 +221,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL }; - uint32_t retval; - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } @@ -370,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -434,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uint64_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uint64_t)wptr)); - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -452,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + static int kgd_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -489,72 +426,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = m->sdma_engine_id ? - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -564,28 +496,26 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id); - pr_debug("sdma base addr %x\n", sdma_base_addr); - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -619,14 +549,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -747,157 +677,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - uint32_t req = (1 << vmid) | - (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */ - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK; - - mutex_lock(&adev->srbm_mutex); - - /* Use light weight invalidation. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate only GCHUB as - * SDMA is now moved to GCHUB - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req); - - while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) & - (1 << vmid))) - cpu_relax(); - - mutex_unlock(&adev->srbm_mutex); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - write_vmid_invalidate_request(kgd, vmid); - break; - } - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - write_vmid_invalidate_request(kgd, vmid); - return 0; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -950,7 +775,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint64_t base = page_table_base | AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", @@ -958,18 +782,30 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, return; } - /* TODO: take advantage of per-process address space size. For - * now, all processes share the same address space size, like - * on GFX8 and older. - */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + /* SDMA is on gfxhub as well for Navi1* series */ + gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +const struct kfd2kgd_calls gfx_v10_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hiq_mqd_load = kgd_hiq_mqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5f459bf5f622..8f052e98a3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" @@ -86,65 +84,6 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); - -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); - -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); -static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -170,37 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -303,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -413,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdma_rlc_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdma_rlc_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -524,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -645,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } @@ -758,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -801,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid\n"); - return 0; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - /** * read_vmid_from_vmfault_reg - read vmid from register * @@ -855,3 +711,26 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } + +const struct kfd2kgd_calls gfx_v7_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6d2f61449606..19a10db93d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,9 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" @@ -44,62 +41,6 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -125,38 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -260,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("sdma base address: 0x%x\n", retval); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -398,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -517,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -641,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -760,41 +657,25 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return -EINVAL; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} +const struct kfd2kgd_calls gfx_v8_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 85395f2d83a6..8562afe5b761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,17 +19,10 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#define pr_fmt(fmt) "kfd2kgd: " fmt - -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "soc15_hw_ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" @@ -49,75 +42,17 @@ #include "gfxhub_v1_0.h" -#define V9_PIPE_PER_MEC (4) -#define V9_QUEUES_PER_PIPE_MEC (8) - enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -135,39 +70,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = amdgpu_amdkfd_get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .get_hive_id = amdgpu_amdkfd_get_hive_id, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -201,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -215,7 +117,7 @@ static void release_queue(struct kgd_dev *kgd) unlock_srbm(kgd); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, @@ -232,7 +134,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, unlock_srbm(kgd); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -293,7 +195,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; @@ -313,22 +215,21 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[2] = { + uint32_t sdma_engine_reg_base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL }; - uint32_t retval; - - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } @@ -343,7 +244,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) @@ -357,21 +258,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -422,7 +308,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -438,7 +324,60 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v9_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { @@ -475,71 +414,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = m->sdma_engine_id ? - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -549,7 +484,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -559,15 +495,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -575,7 +511,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -601,14 +537,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -616,7 +552,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) @@ -671,155 +607,60 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type) +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - uint32_t flush_type = 0; - - if (adev->in_gpu_reset) - return -EIO; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) - flush_type = 2; - - if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid, flush_type); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - flush_type); - break; - } - } - } - return 0; } -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); - return 0; -} - -static int kgd_address_watch_disable(struct kgd_dev *kgd) -{ - return 0; -} - -static int kgd_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -828,7 +669,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, uint32_t gfx_index_val, uint32_t sq_cmd) { @@ -853,24 +694,15 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid) -{ - /* No longer needed on GFXv9. The scratch base address is - * passed to the shader by the CP. It's the user mode driver's - * responsibility. - */ -} - -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, + uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -880,11 +712,31 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, return; } - /* TODO: take advantage of per-process address space size. For - * now, all processes share the same address space size, like - * on GFX8 and older. - */ mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +const struct kfd2kgd_calls gfx_v9_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h new file mode 100644 index 000000000000..63d3e6683dfe --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -0,0 +1,64 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + + +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6a5c96e519b1..fa8ac9d19a7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -19,9 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#define pr_fmt(fmt) "kfd2kgd: " fmt - #include <linux/dma-buf.h> #include <linux/list.h> #include <linux/pagemap.h> @@ -33,11 +30,6 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" -/* Special VM and GART address alignment needed for VI pre-Fiji due to - * a HW bug. - */ -#define VI_BO_SIZE_ALIGN (0x8000) - /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -93,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (TTM + userptr) memory - 3/4th System RAM + * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) @@ -106,18 +98,31 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +/* Estimate page table size needed to represent a given memory size + * + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize + * for 2MB pages for TLB efficiency. However, small allocations and + * fragmented system memory still need some 4KB pages. We choose a + * compromise that should work in most cases without reserving too + * much memory for page tables unnecessarily (factor 16K, >> 14). + */ +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; - uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, @@ -218,14 +223,14 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct reservation_object *resv = bo->tbo.resv; - struct reservation_object_list *old, *new; + struct dma_resv *resv = bo->tbo.base.resv; + struct dma_resv_list *old, *new; unsigned int i, j, k; if (!ef) return -EINVAL; - old = reservation_object_get_list(resv); + old = dma_resv_get_list(resv); if (!old) return 0; @@ -241,7 +246,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct dma_fence *f; f = rcu_dereference_protected(old->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); if (f->context == ef->base.context) RCU_INIT_POINTER(new->shared[--j], f); @@ -263,7 +268,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct dma_fence *f; f = rcu_dereference_protected(new->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); dma_fence_put(f); } kfree_rcu(old, rcu); @@ -349,11 +354,44 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_update_directories(adev, vm); + ret = amdgpu_vm_update_pdes(adev, vm, false); if (ret) return ret; - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update, false); +} + +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + uint32_t mapping_flags; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (bo_adev == adev) + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + + return amdgpu_gem_va_map_flags(adev, mapping_flags); } /* add_bo_to_vm - Add a BO to a VM @@ -404,8 +442,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, } bo_va_entry->va = va; - bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, - mem->mapping_flags); + bo_va_entry->pte_flags = get_pte_flags(adev, mem); bo_va_entry->kgd_dev = (void *)adev; list_add(&bo_va_entry->bo_list, list_bo_va); @@ -481,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, - uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -586,7 +622,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates, true); + false, &ctx->duplicates); if (!ret) ctx->reserved = true; else { @@ -659,7 +695,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, } ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates, true); + false, &ctx->duplicates); if (!ret) ctx->reserved = true; else @@ -714,7 +750,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update, false); return 0; } @@ -733,7 +769,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -812,7 +848,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, struct amdgpu_bo *pd = peer_vm->root.base.bo; ret = amdgpu_sync_resv(NULL, - sync, pd->tbo.resv, + sync, pd->tbo.base.resv, AMDGPU_FENCE_OWNER_KFD, false); if (ret) return ret; @@ -887,7 +923,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); + ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, @@ -1079,10 +1115,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; - int byte_align; u32 domain, alloc_domain; u64 alloc_flags; - uint32_t mapping_flags; int ret; /* @@ -1090,7 +1124,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( */ if (flags & ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; @@ -1103,7 +1137,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags = 0; if (!offset || !*offset) return -EINVAL; - user_addr = *offset; + user_addr = untagged_addr(*offset); } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; @@ -1135,25 +1169,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if ((*mem)->aql_queue) size = size >> 1; - /* Workaround for TLB bug on older VI chips */ - byte_align = (adev->family == AMDGPU_FAMILY_VI && - adev->asic_type != CHIP_FIJI && - adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12 && - adev->asic_type != CHIP_VEGAM) ? - VI_BO_SIZE_ALIGN : 1; - - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (flags & ALLOC_MEM_FLAGS_WRITABLE) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & ALLOC_MEM_FLAGS_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - (*mem)->mapping_flags = mapping_flags; + (*mem)->alloc_flags = flags; amdgpu_sync_create(&(*mem)->sync); @@ -1168,7 +1184,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = byte_align; + bp.byte_align = 1; bp.domain = alloc_domain; bp.flags = alloc_flags; bp.type = bo_type; @@ -1195,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); + ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; } @@ -1626,9 +1642,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - (*mem)->mapping_flags = - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + (*mem)->alloc_flags = + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | + ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; @@ -1657,10 +1674,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) { struct amdkfd_process_info *process_info = mem->process_info; - int invalid, evicted_bos; + int evicted_bos; int r = 0; - invalid = atomic_inc_return(&mem->invalid); + atomic_inc(&mem->invalid); evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ @@ -1739,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return ret; } + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); /* Mark the BO as valid unless it was invalidated @@ -1797,8 +1818,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } /* Reserve all BOs and page tables for validation */ - ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates, - true); + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) goto out_free; @@ -1996,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, - false, &duplicate_save, true); + false, &duplicate_save); if (ret) { pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); goto ttm_reserve_fail; @@ -2028,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; @@ -2109,6 +2129,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem return -ENOMEM; mutex_init(&(*mem)->lock); + INIT_LIST_HEAD(&(*mem)->bo_va_list); (*mem)->bo = amdgpu_bo_ref(gws_bo); (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; (*mem)->process_info = process_info; @@ -2133,7 +2154,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ - ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1); + ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1c9d40f97a9b..fdd52d86a4d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; @@ -2038,6 +2022,11 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); + ret = amdgpu_atomfirmware_get_mem_train_info(adev); + if (ret) { + DRM_ERROR("Failed to get mem train fb location.\n"); + return ret; + } } else { amdgpu_atombios_scratch_regs_init(adev); amdgpu_atombios_allocate_fb_scratch(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index daf687428cdb..58f9d8c3a17a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -27,6 +27,7 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" #include "atombios.h" +#include "soc15_hw_ip.h" bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) { @@ -120,65 +121,14 @@ union vram_info { struct atom_vram_info_header_v2_3 v23; struct atom_vram_info_header_v2_4 v24; }; -/* - * Return vram width from integrated system info table, if available, - * or 0 if not. - */ -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) -{ - struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index; - u16 data_offset, size; - union igp_info *igp_info; - union vram_info *vram_info; - u32 mem_channel_number; - u32 mem_channel_width; - u8 frev, crev; - if (adev->flags & AMD_IS_APU) - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - integratedsysteminfo); - else - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - vram_info); - - /* get any igp specific overrides */ - if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, - &frev, &crev, &data_offset)) { - if (adev->flags & AMD_IS_APU) { - igp_info = (union igp_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - return mem_channel_number * 64; - default: - return 0; - } - } else { - vram_info = (union vram_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 3: - mem_channel_number = vram_info->v23.vram_module[0].channel_num; - mem_channel_width = vram_info->v23.vram_module[0].channel_width; - return mem_channel_number * (1 << mem_channel_width); - case 4: - mem_channel_number = vram_info->v24.vram_module[0].channel_num; - mem_channel_width = vram_info->v24.vram_module[0].channel_width; - return mem_channel_number * (1 << mem_channel_width); - default: - return 0; - } - } - } - - return 0; -} +union vram_module { + struct atom_vram_module_v9 v9; + struct atom_vram_module_v10 v10; +}; -static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, - int atom_mem_type) +static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, + int atom_mem_type) { int vram_type; @@ -219,19 +169,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, return vram_type; } -/* - * Return vram type from either integrated system info table - * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not - */ -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) + + +int +amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, + int *vram_vendor) { struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index; + int index, i = 0; u16 data_offset, size; union igp_info *igp_info; union vram_info *vram_info; + union vram_module *vram_module; u8 frev, crev; u8 mem_type; + u8 mem_vendor; + u32 mem_channel_number; + u32 mem_channel_width; + u32 module_id; if (adev->flags & AMD_IS_APU) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, @@ -239,6 +195,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) else index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -247,25 +204,67 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset); switch (crev) { case 11: + mem_channel_number = igp_info->v11.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; mem_type = igp_info->v11.memorytype; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: - return 0; + return -EINVAL; } } else { vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); + module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; switch (crev) { case 3: - mem_type = vram_info->v23.vram_module[0].memory_type; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (module_id > vram_info->v23.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v23.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; case 4: - mem_type = vram_info->v24.vram_module[0].memory_type; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (module_id > vram_info->v24.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v24.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v10.vram_module_size); + i++; + } + mem_type = vram_module->v10.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v10.channel_num; + mem_channel_width = vram_module->v10.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; default: - return 0; + return -EINVAL; } } + } return 0; @@ -464,3 +463,108 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) } return -EINVAL; } + +/* + * Check if VBIOS supports GDDR6 training data save/restore + */ +static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev) +{ + uint16_t data_offset; + int index; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + DRM_DEBUG("atom firmware capability:0x%08x.\n", + le32_to_cpu(firmware_info->firmware_capability)); + + if (le32_to_cpu(firmware_info->firmware_capability) & + ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) + return true; + } + + return false; +} + +static int gddr6_mem_train_support(struct amdgpu_device *adev) +{ + int ret; + uint32_t major, minor, revision, hw_v; + + if (gddr6_mem_train_vbios_support(adev)) { + amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision); + hw_v = HW_REV(major, minor, revision); + /* + * treat 0 revision as a special case since register for MP0 and MMHUB is missing + * for some Navi10 A0, preventing driver from discovering the hwip information since + * none of the functions will be initialized, it should not cause any problems + */ + switch (hw_v) { + case HW_REV(11, 0, 0): + case HW_REV(11, 0, 5): + ret = 1; + break; + default: + DRM_ERROR("memory training vbios supports but psp hw(%08x)" + " doesn't support!\n", hw_v); + ret = -1; + break; + } + } else { + ret = 0; + hw_v = -1; + } + + + DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret); + return ret; +} + +int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + int index; + uint8_t frev, crev; + uint16_t data_offset, size; + int ret; + + adev->fw_vram_usage.mem_train_support = false; + + if (adev->asic_type != CHIP_NAVI10 && + adev->asic_type != CHIP_NAVI14) + return 0; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = gddr6_mem_train_support(adev); + if (ret == -1) + return -EINVAL; + else if (ret == 0) + return 0; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_usagebyfirmware); + ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, + &data_offset); + if (ret == 0) { + DRM_ERROR("parse data header failed.\n"); + return -EINVAL; + } + + DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x," + " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset); + /* only support 2.1+ */ + if (((uint16_t)frev << 8 | crev) < 0x0201) { + DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev); + return -EINVAL; + } + + adev->fw_vram_usage.mem_train_support = true; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 5ec6f92f353c..434fe2fa0089 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -29,8 +29,9 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, int *vram_vendor); +int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 9b384a94d2f3..3e35a8f2c5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -574,6 +574,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 649e68c4479b..d1495e1c9289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct dma_fence *fence = NULL; + struct dma_fence *fence; int i, r; start_jiffies = jiffies; @@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; r = dma_fence_wait(fence, false); + dma_fence_put(fence); if (r) goto exit_do_move; - dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: - if (fence) - dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7bcf86c61999..85b0515c0fdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - while (i--) { + for (i = 0; i < last_entry; ++i) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); + + amdgpu_bo_unref(&bo); + } + for (i = first_userptr; i < num_entries; ++i) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); amdgpu_bo_unref(&bo); @@ -270,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_create_list_entry_array(&args->in, &info); if (r) - goto error_free; + return r; switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: @@ -283,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - amdgpu_bo_list_put(list); - return r; + goto error_put_list; } handle = r; @@ -306,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, mutex_unlock(&fpriv->bo_list_lock); if (IS_ERR(old)) { - amdgpu_bo_list_put(list); r = PTR_ERR(old); - goto error_free; + goto error_put_list; } amdgpu_bo_list_put(old); @@ -325,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; +error_put_list: + amdgpu_bo_list_put(list); + error_free: - if (info) - kvfree(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 73b2ede773d3..a62cbc8199de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -217,11 +217,10 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector, struct drm_encoder *encoder; const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; bool connected; - int i; best_encoder = connector_funcs->best_encoder(connector); - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if ((encoder == best_encoder) && (status == connector_status_connected)) connected = true; else @@ -236,9 +235,8 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, int encoder_type) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type == encoder_type) return encoder; } @@ -347,10 +345,9 @@ static struct drm_encoder * amdgpu_connector_best_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; - int i; /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -1022,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) */ if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { struct drm_connector *list_connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *list_amdgpu_connector; - list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) { + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { if (connector == list_connector) continue; list_amdgpu_connector = to_amdgpu_connector(list_connector); @@ -1040,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } } } + drm_connector_list_iter_end(&iter); } } } @@ -1065,9 +1067,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) /* find analog encoder */ if (amdgpu_connector->dac_load_detect) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC && encoder->encoder_type != DRM_MODE_ENCODER_TVDAC) continue; @@ -1117,9 +1118,8 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (amdgpu_connector->use_digital == true) { if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS) return encoder; @@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) /* then check use digitial */ /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -1271,9 +1271,8 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { amdgpu_encoder = to_amdgpu_encoder(encoder); switch (amdgpu_encoder->encoder_id) { @@ -1292,10 +1291,9 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector) { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - int i; bool found = false; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { amdgpu_encoder = to_amdgpu_encoder(encoder); if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2) found = true; @@ -1501,10 +1499,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *amdgpu_dig_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct i2c_adapter *ddc = NULL; uint32_t subpixel_order = SubPixelNone; bool shared_ddc = false; bool is_dp_bridge = false; @@ -1514,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, return; /* see if we already added it */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->connector_id == connector_id) { amdgpu_connector->devices |= supported_device; + drm_connector_list_iter_end(&iter); return; } if (amdgpu_connector->ddc_bus && i2c_bus->valid) { @@ -1532,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, } } } + drm_connector_list_iter_end(&iter); /* check if it's a dp bridge */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { @@ -1574,17 +1577,21 @@ amdgpu_connector_add(struct amdgpu_device *adev, amdgpu_connector->con_priv = amdgpu_dig_connector; if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: default: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); connector->interlace_allowed = true; @@ -1602,8 +1609,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1644,8 +1653,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_edp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1659,13 +1670,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, } else { switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1679,13 +1695,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, connector->doublescan_allowed = true; break; case DRM_MODE_CONNECTOR_DVIA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1704,13 +1725,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1754,13 +1780,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, 1); @@ -1796,15 +1827,20 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1838,15 +1874,20 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); @@ -1859,13 +1900,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_lvds_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e4094f842e7..a52a084158b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -35,6 +35,7 @@ #include "amdgpu_trace.h" #include "amdgpu_gmc.h" #include "amdgpu_gem.h" +#include "amdgpu_ras.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -402,7 +403,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.resv, + .resv = bo->tbo.base.resv, .flags = 0 }; uint32_t domain; @@ -449,75 +450,12 @@ retry: return r; } -/* Last resort, try to evict something from the current working set */ -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, - struct amdgpu_bo *validated) -{ - uint32_t domain = validated->allowed_domains; - struct ttm_operation_ctx ctx = { true, false }; - int r; - - if (!p->evictable) - return false; - - for (;&p->evictable->tv.head != &p->validated; - p->evictable = list_prev_entry(p->evictable, tv.head)) { - - struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - bool update_bytes_moved_vis; - uint32_t other; - - /* If we reached our current BO we can forget it */ - if (bo == validated) - break; - - /* We can't move pinned BOs here */ - if (bo->pin_count) - continue; - - other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this BO is in one of the domains we need space for */ - if (!(other & domain)) - continue; - - /* Check if we can move this BO somewhere else */ - other = bo->allowed_domains & ~domain; - if (!other) - continue; - - /* Good we can try to move this BO somewhere else */ - update_bytes_moved_vis = - !amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_bo_placement_from_domain(bo, other); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - p->bytes_moved += ctx.bytes_moved; - if (update_bytes_moved_vis) - p->bytes_moved_vis += ctx.bytes_moved; - - if (unlikely(r)) - break; - - p->evictable = list_prev_entry(p->evictable, tv.head); - list_move(&candidate->tv.head, &p->validated); - - return true; - } - - return false; -} - static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_cs_parser *p = param; int r; - do { - r = amdgpu_cs_bo_validate(p, bo); - } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); + r = amdgpu_cs_bo_validate(p, bo); if (r) return r; @@ -536,7 +474,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); - bool binding_userptr = false; struct mm_struct *usermm; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); @@ -553,20 +490,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); - binding_userptr = true; } - if (p->evictable == lobj) - p->evictable = NULL; - r = amdgpu_cs_validate(p, bo); if (r) return r; - if (binding_userptr) { - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } + kvfree(lobj->user_pages); + lobj->user_pages = NULL; } return 0; } @@ -607,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -650,7 +579,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates, false); + &duplicates); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); @@ -661,9 +590,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; - p->evictable = list_last_entry(&p->validated, - struct amdgpu_bo_list_entry, - tv.head); r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, amdgpu_cs_validate, p); @@ -730,7 +656,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct reservation_object *resv = bo->tbo.resv; + struct dma_resv *resv = bo->tbo.base.resv; r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, amdgpu_bo_explicit_sync(bo)); @@ -869,29 +795,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, - fpriv->prt_va->last_pt_update, false); + r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { - struct dma_fence *f; - bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_fence *f; - /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); if (!bo) @@ -905,8 +825,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -915,11 +834,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); + r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); if (r) return r; @@ -990,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->entity && parser->entity != entity) return -EINVAL; + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched); @@ -1061,7 +985,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence, true); dma_fence_put(fence); if (r) return r; @@ -1083,7 +1007,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence, true); dma_fence_put(fence); return r; @@ -1143,6 +1067,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; @@ -1166,8 +1093,7 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk - *chunk) + struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; unsigned num_deps; @@ -1177,6 +1103,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_syncobj); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; @@ -1286,11 +1215,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. - * p->mn is hold until amdgpu_cs_submit is finished and fence is added - * to BOs. + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. */ - amdgpu_mn_lock(p->mn); + mutex_lock(&p->adev->notifier_lock); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1305,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, goto error_abort; } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); @@ -1333,13 +1261,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); @@ -1354,6 +1282,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) bool reserved_buffers = false; int i, r; + if (amdgpu_ras_intr_triggered()) + return -EHWPOISON; + if (!adev->accel_working) return -EBUSY; @@ -1727,7 +1658,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, *map = mapping; /* Double check that the BO is reserved by this CS */ - if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) return -EINVAL; if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 35a8d3c96fc9..08047bc4d588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -80,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, list_add(&csa_tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false); + r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); if (r) { DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f539a2a92774..94a6c42f29ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgput_ctx_total_num_entities(void) -{ - unsigned i, num_entities = 0; - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - num_entities += amdgpu_ctx_num_entities[i]; - - return num_entities; -} - static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) { + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) + return -EINVAL; + /* NORMAL and below are accessible by everyone */ if (priority <= DRM_SCHED_PRIORITY_NORMAL) return 0; @@ -68,47 +61,94 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring) +{ + struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_entity *entity; + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; + unsigned num_scheds = 0; + enum drm_sched_priority priority; + int r; + + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), + GFP_KERNEL); + if (!entity) + return -ENOMEM; + + entity->sequence = 1; + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + sched = &adev->gfx.gfx_ring[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_COMPUTE: + scheds = adev->gfx.compute_sched; + num_scheds = adev->gfx.num_compute_sched; + break; + case AMDGPU_HW_IP_DMA: + scheds = adev->sdma.sdma_sched; + num_scheds = adev->sdma.num_sdma_sched; + break; + case AMDGPU_HW_IP_UVD: + sched = &adev->uvd.inst[0].ring.sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_VCE: + sched = &adev->vce.ring[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_UVD_ENC: + sched = &adev->uvd.inst[0].ring_enc[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_VCN_DEC: + scheds = adev->vcn.vcn_dec_sched; + num_scheds = adev->vcn.num_vcn_dec_sched; + break; + case AMDGPU_HW_IP_VCN_ENC: + scheds = adev->vcn.vcn_enc_sched; + num_scheds = adev->vcn.num_vcn_enc_sched; + break; + case AMDGPU_HW_IP_VCN_JPEG: + scheds = adev->jpeg.jpeg_sched; + num_scheds = adev->jpeg.num_jpeg_sched; + break; + } + + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, + &ctx->guilty); + if (r) + goto error_free_entity; + + ctx->entities[hw_ip][ring] = entity; + return 0; + +error_free_entity: + kfree(entity); + + return r; +} + static int amdgpu_ctx_init(struct amdgpu_device *adev, enum drm_sched_priority priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { - unsigned num_entities = amdgput_ctx_total_num_entities(); - unsigned i, j; int r; - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) - return -EINVAL; - r = amdgpu_ctx_priority_permit(filp, priority); if (r) return r; memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - - ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!ctx->fences) - return -ENOMEM; - - ctx->entities[0] = kcalloc(num_entities, - sizeof(struct amdgpu_ctx_entity), - GFP_KERNEL); - if (!ctx->entities[0]) { - r = -ENOMEM; - goto error_free_fences; - } - - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - entity->sequence = 1; - entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; - } - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) - ctx->entities[i] = ctx->entities[i - 1] + - amdgpu_ctx_num_entities[i - 1]; + ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); @@ -120,104 +160,49 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings; - unsigned num_rqs = 0; - - switch (i) { - case AMDGPU_HW_IP_GFX: - rings[0] = &adev->gfx.gfx_ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_COMPUTE: - for (j = 0; j < adev->gfx.num_compute_rings; ++j) - rings[j] = &adev->gfx.compute_ring[j]; - num_rings = adev->gfx.num_compute_rings; - break; - case AMDGPU_HW_IP_DMA: - for (j = 0; j < adev->sdma.num_instances; ++j) - rings[j] = &adev->sdma.instance[j].ring; - num_rings = adev->sdma.num_instances; - break; - case AMDGPU_HW_IP_UVD: - rings[0] = &adev->uvd.inst[0].ring; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCE: - rings[0] = &adev->vce.ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_UVD_ENC: - rings[0] = &adev->uvd.inst[0].ring_enc[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_DEC: - rings[0] = &adev->vcn.ring_dec; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_ENC: - rings[0] = &adev->vcn.ring_enc[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_JPEG: - rings[0] = &adev->vcn.ring_jpeg; - num_rings = 1; - break; - } + return 0; - for (j = 0; j < num_rings; ++j) { - if (!rings[j]->adev) - continue; +} - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority]; - } +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - rqs, num_rqs, &ctx->guilty); - if (r) - goto error_cleanup_entities; - } + int i; - return 0; + if (!entity) + return; -error_cleanup_entities: - for (i = 0; i < num_entities; ++i) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); - kfree(ctx->entities[0]); + for (i = 0; i < amdgpu_sched_jobs; ++i) + dma_fence_put(entity->fences[i]); -error_free_fences: - kfree(ctx->fences); - ctx->fences = NULL; - return r; + kfree(entity); } static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; - for (i = 0; i < num_entities; ++i) - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->entities[0][i].fences[j]); - kfree(ctx->fences); - kfree(ctx->entities[0]); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { + amdgpu_ctx_fini_entity(ctx->entities[i][j]); + ctx->entities[i][j] = NULL; + } + } mutex_destroy(&ctx->lock); - kfree(ctx); } int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { + int r; + if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); return -EINVAL; @@ -234,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } - *entity = &ctx->entities[hw_ip][ring].entity; + if (ctx->entities[hw_ip][ring] == NULL) { + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); + if (r) + return r; + } + + *entity = &ctx->entities[hw_ip][ring]->entity; return 0; } @@ -274,17 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; - unsigned num_entities; - u32 i; + u32 i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i][j]) + continue; - num_entities = 0; - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) - num_entities += amdgpu_ctx_num_entities[i]; - - for (i = 0; i < num_entities; i++) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); + } + } amdgpu_ctx_fini(ref); } @@ -344,7 +335,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; - uint32_t ras_counter; + unsigned long ras_counter; if (!fpriv) return -EINVAL; @@ -514,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - unsigned num_entities = amdgput_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; - unsigned i; + unsigned i, j; ctx->override_priority = priority; ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity = &ctx->entities[0][i].entity; + if (!ctx->entities[i][j]) + continue; - drm_sched_entity_set_priority(entity, ctx_prio); + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_set_priority(entity, ctx_prio); + } } } @@ -534,21 +529,24 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); - unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); - struct dma_fence *other = centity->fences[idx]; + struct dma_fence *other; + unsigned idx; + long r; - if (other) { - signed long r; - r = dma_fence_wait(other, true); - if (r < 0) { - if (r != -ERESTARTSYS) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + spin_lock(&ctx->ring_lock); + idx = centity->sequence & (amdgpu_sched_jobs - 1); + other = dma_fence_get(centity->fences[idx]); + spin_unlock(&ctx->ring_lock); - return r; - } - } + if (!other) + return 0; - return 0; + r = dma_fence_wait(other, true); + if (r < 0 && r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + + dma_fence_put(other); + return r; } void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) @@ -559,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; - entity = &ctx->entities[0][i].entity; - timeout = drm_sched_entity_flush(entity, timeout); + entity = &ctx->entities[i][j]->entity; + timeout = drm_sched_entity_flush(entity, timeout); + } } } mutex_unlock(&mgr->lock); @@ -581,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; @@ -594,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) - drm_sched_entity_fini(&ctx->entities[0][i].entity); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; + + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_fini(entity); + } + } } } @@ -617,3 +627,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } + +void amdgpu_ctx_init_sched(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched; + adev->gfx.num_gfx_sched++; + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched; + adev->gfx.num_compute_sched++; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched; + adev->sdma.num_sdma_sched++; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] = + &adev->vcn.inst[i].ring_dec.sched; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) + adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] = + &adev->vcn.inst[i].ring_enc[j].sched; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] = + &adev->jpeg.inst[i].ring_dec.sched; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 5f1b54c9bcdb..de490f183af2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -29,10 +29,12 @@ struct drm_device; struct drm_file; struct amdgpu_fpriv; +#define AMDGPU_MAX_ENTITY_NUM 4 + struct amdgpu_ctx_entity { uint64_t sequence; - struct dma_fence **fences; struct drm_sched_entity entity; + struct dma_fence *fences[]; }; struct amdgpu_ctx { @@ -42,15 +44,14 @@ struct amdgpu_ctx { unsigned reset_counter_query; uint32_t vram_lost_counter; spinlock_t ring_lock; - struct dma_fence **fences; - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; bool preamble_presented; enum drm_sched_priority init_priority; enum drm_sched_priority override_priority; struct mutex lock; atomic_t guilty; - uint32_t ras_counter_ce; - uint32_t ras_counter_ue; + unsigned long ras_counter_ce; + unsigned long ras_counter_ue; }; struct amdgpu_ctx_mgr { @@ -87,4 +88,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_init_sched(struct amdgpu_device *adev); + + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5652cc72ed3a..f24ed9a1a3e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -26,6 +26,7 @@ #include <linux/kthread.h> #include <linux/pci.h> #include <linux/uaccess.h> +#include <linux/pm_runtime.h> #include <drm/drm_debugfs.h> @@ -129,7 +130,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank = 0xFFFFFFFF; if (instance_bank == 0x3FF) instance_bank = 0xFFFFFFFF; - use_bank = 1; + use_bank = true; } else if (*pos & (1ULL << 61)) { me = (*pos & GENMASK_ULL(33, 24)) >> 24; @@ -137,17 +138,24 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, queue = (*pos & GENMASK_ULL(53, 44)) >> 44; vmid = (*pos & GENMASK_ULL(58, 54)) >> 54; - use_ring = 1; + use_ring = true; } else { - use_bank = use_ring = 0; + use_bank = use_ring = false; } *pos &= (1UL << 22) - 1; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, sh_bank, instance_bank); @@ -193,6 +201,9 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_PCIE(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_PCIE(*pos >> 2, value); @@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_DIDT(*pos >> 2, value); @@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_SMC(*pos, value); @@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, wave = (*pos & GENMASK_ULL(36, 31)) >> 31; simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (!x) return -EINVAL; @@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (!data) return -ENOMEM; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + while (size) { uint32_t value; @@ -859,6 +953,13 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; + + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -884,6 +985,11 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } + mutex_unlock(&adev->lock_reset); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -902,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -912,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -1036,6 +1160,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) if (!fences) return -ENOMEM; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* stop the scheduler */ kthread_park(ring->sched.thread); @@ -1075,10 +1202,11 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); + mutex_unlock(&adev->lock_reset); + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (fences) - kfree(fences); + kfree(fences); return 0; } @@ -1090,8 +1218,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) { adev->debugfs_preempt = debugfs_create_file("amdgpu_preempt_ib", 0600, - adev->ddev->primary->debugfs_root, - (void *)adev, &fops_ib_preempt); + adev->ddev->primary->debugfs_root, adev, + &fops_ib_preempt); if (!(adev->debugfs_preempt)) { DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); return -EIO; @@ -1103,8 +1231,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { - if (adev->debugfs_preempt) - debugfs_remove(adev->debugfs_preempt); + debugfs_remove(adev->debugfs_preempt); } #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a7f893cf724..39cd545976b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,16 +65,23 @@ #include "amdgpu_ras.h" #include "amdgpu_pmu.h" +#include <linux/suspend.h> +#include <drm/task_barrier.h> + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -static const char *amdgpu_asic_name[] = { +const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", "VERDE", @@ -98,7 +105,11 @@ static const char *amdgpu_asic_name[] = { "VEGA12", "VEGA20", "RAVEN", + "ARCTURUS", + "RENOIR", "NAVI10", + "NAVI14", + "NAVI12", "LAST", }; @@ -127,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO, static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** - * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control + * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control * * @dev: drm_device pointer * * Returns true if the device is a dGPU with HG/PX power control, * otherwise return false. */ -bool amdgpu_device_is_px(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -143,6 +154,51 @@ bool amdgpu_device_is_px(struct drm_device *dev) return false; } +/** + * amdgpu_device_supports_baco - Does the device support BACO + * + * @dev: drm_device pointer + * + * Returns true if the device supporte BACO, + * otherwise return false. + */ +bool amdgpu_device_supports_baco(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + + return amdgpu_asic_supports_baco(adev); +} + +/** + * VRAM access helper functions. + * + * amdgpu_device_vram_access - read/write a buffer in vram + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + */ +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + uint32_t *buf, size_t size, bool write) +{ + uint64_t last; + unsigned long flags; + + last = size - 4; + for (last += pos; pos <= last; pos += 4) { + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); + if (write) + WREG32_NO_KIQ(mmMM_DATA, *buf++); + else + *buf++ = RREG32_NO_KIQ(mmMM_DATA); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } +} + /* * MMIO register access helper functions. */ @@ -160,8 +216,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_rreg(adev, reg); + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + return amdgpu_kiq_rreg(adev, reg); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -238,8 +294,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_wreg(adev, reg, v); + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + return amdgpu_kiq_wreg(adev, reg, v); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -413,6 +469,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32 } /** + * amdgpu_invalid_rreg64 - dummy 64 bit reg read function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ +static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) +{ + DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + BUG(); + return 0; +} + +/** + * amdgpu_invalid_wreg64 - dummy reg write function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + */ +static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ + DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); + BUG(); +} + +/** * amdgpu_block_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -895,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) { struct sysinfo si; - bool is_os_64 = (sizeof(void *) == 8) ? true : false; + bool is_os_64 = (sizeof(void *) == 8); uint64_t total_memory; uint64_t dram_size_seven_GB = 0x1B8000000; uint64_t dram_size_three_GB = 0xB8000000; @@ -942,8 +1032,6 @@ def_value: */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { - int ret = 0; - if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -981,15 +1069,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); - ret = amdgpu_device_get_job_timeout_settings(adev); - if (ret) { - dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return ret; - } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - return ret; + return 0; } /** @@ -1004,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { struct drm_device *dev = pci_get_drvdata(pdev); + int r; - if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) + if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) return; if (state == VGA_SWITCHEROO_ON) { @@ -1013,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - amdgpu_device_resume(dev, true, true); + pci_set_power_state(dev->pdev, PCI_D0); + pci_restore_state(dev->pdev); + r = pci_enable_device(dev->pdev); + if (r) + DRM_WARN("pci_enable_device failed (%d)\n", r); + amdgpu_device_resume(dev, true); dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); @@ -1021,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero pr_info("amdgpu: switched off\n"); drm_kms_helper_poll_disable(dev); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - amdgpu_device_suspend(dev, true, true); + amdgpu_device_suspend(dev, true); + pci_save_state(dev->pdev); + /* Shut down the device */ + pci_disable_device(dev->pdev); + pci_set_power_state(dev->pdev, PCI_D3cold); dev->switch_power_state = DRM_SWITCH_POWER_OFF; } } @@ -1384,9 +1476,21 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1415,6 +1519,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + goto parse_soc_bounding_box; + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); @@ -1442,14 +1549,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.config.num_packer_per_sc = le32_to_cpu(gpu_info_fw->num_packer_per_sc); } -#ifdef CONFIG_DRM_AMD_DC_DCN2_0 + +parse_soc_bounding_box: + /* + * soc bounding box info is not integrated in disocovery table, + * we always need to parse it from gpu info firmware. + */ if (hdr->version_minor == 2) { const struct gpu_info_firmware_v1_2 *gpu_info_fw = (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; } -#endif break; } default: @@ -1529,7 +1640,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - if (adev->asic_type == CHIP_RAVEN) + case CHIP_ARCTURUS: + case CHIP_RENOIR: + if (adev->asic_type == CHIP_RAVEN || + adev->asic_type == CHIP_RENOIR) adev->family = AMDGPU_FAMILY_RV; else adev->family = AMDGPU_FAMILY_AI; @@ -1539,6 +1653,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1554,19 +1670,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + amdgpu_discovery_get_gfx_info(adev); + amdgpu_amdkfd_device_probe(adev); if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; - - /* query the reg access mode at the very beginning */ - amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1665,29 +1781,36 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { - if (adev->in_gpu_reset || adev->in_suspend) { - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) + continue; + + /* no need to do the fw loading again if already done*/ + if (adev->ip_blocks[i].status.hw == true) + break; + + if (adev->in_gpu_reset || adev->in_suspend) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); - return r; - } - } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } + return r; + } + } else { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } - adev->ip_blocks[i].status.hw = true; } + + adev->ip_blocks[i].status.hw = true; + break; } } - r = amdgpu_pm_load_smu_firmware(adev, &smu_version); + + if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); return r; } @@ -1754,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_data_exchange(adev); + r = amdgpu_ib_pool_init(adev); if (r) { dev_err(adev->dev, "IB initialization failed (%d).\n", r); @@ -1777,16 +1903,26 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + /* + * retired pages will be loaded from eeprom and reserved here, + * it should be called after amdgpu_device_ip_hw_init_phase2 since + * for some ASICs the RAS EEPROM code relies on SMU fully functioning + * for I2C communication which only true at this point. + * recovery_init may fail, but it can free all resources allocated by + * itself and its failure should not stop amdgpu init process. + * + * Note: theoretically, this should be called before all vram allocations + * to protect retired page from abusing + */ + amdgpu_ras_recovery_init(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); init_failed: - if (amdgpu_sriov_vf(adev)) { - if (!r) - amdgpu_virt_init_data_exchange(adev); + if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, true); - } return r; } @@ -1825,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * amdgpu_device_set_cg_state - set clockgating for amdgpu device * * @adev: amdgpu_device pointer + * @state: clockgating state (gate or ungate) * * The list of all the hardware IPs that make up the asic is walked and the * set_clockgating_state callbacks are run. @@ -1849,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1879,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, @@ -1944,6 +2083,7 @@ out: */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { + struct amdgpu_gpu_instance *gpu_instance; int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1969,8 +2109,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); - /* set to low pstate by default */ - amdgpu_xgmi_set_pstate(adev, 0); + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + mutex_lock(&mgpu_info.mutex); + + /* + * Reset device p-state to low as this was booted with high. + * + * This should be performed only after all devices from the same + * hive get initialized. + * + * However, it's unknown how many device in the hive in advance. + * As this is counted one by one during devices initializations. + * + * So, we wait for all XGMI interlinked devices initialized. + * This may bring some delays as those devices may come from + * different hives. But that should be OK. + */ + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev->flags & AMD_IS_APU) + continue; + + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + if (r) { + DRM_ERROR("pstate setting failed (%d).\n", r); + break; + } + } + } + + mutex_unlock(&mgpu_info.mutex); + } return 0; } @@ -2128,7 +2299,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); + return r; } + adev->ip_blocks[i].status.hw = false; } } @@ -2156,6 +2329,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) continue; + /* PSP lost connection when err_event_athub occurs */ + if (amdgpu_ras_intr_triggered() && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { + adev->ip_blocks[i].status.hw = false; + continue; + } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ @@ -2163,6 +2342,18 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); } + adev->ip_blocks[i].status.hw = false; + /* handle putting the SMC in the appropriate state */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; + } + } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2215,6 +2406,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; + block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; @@ -2223,6 +2415,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2239,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, AMD_IP_BLOCK_TYPE_UVD, - AMD_IP_BLOCK_TYPE_VCE + AMD_IP_BLOCK_TYPE_VCE, + AMD_IP_BLOCK_TYPE_VCN }; for (i = 0; i < ARRAY_SIZE(ip_order); i++) { @@ -2250,13 +2444,19 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) block = &adev->ip_blocks[j]; if (block->version->type != ip_order[i] || - !block->status.valid) + !block->status.valid || + block->status.hw) continue; - r = block->version->funcs->hw_init(adev); + if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) + r = block->version->funcs->resume(adev); + else + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2280,17 +2480,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } } @@ -2315,7 +2517,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -2328,6 +2530,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } return 0; @@ -2421,15 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#if defined(CONFIG_DRM_AMD_DC_DCN) case CHIP_RAVEN: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + case CHIP_RENOIR: #endif return amdgpu_dc != 0; #endif default: + if (amdgpu_dc > 0) + DRM_INFO("Display Core has been requested via kernel parameter " + "but isn't supported by ASIC, ignoring\n"); return false; } } @@ -2454,13 +2661,110 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, xgmi_reset_work); + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + + /* It's a bug to not have a hive within this function */ + if (WARN_ON(!hive)) + return; + + /* + * Use task barrier to synchronize all xgmi reset works across the + * hive. task_barrier_enter and task_barrier_exit will block + * until all the threads running the xgmi reset works reach + * those points. task_barrier_full will do both blocks. + */ + if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + + task_barrier_enter(&hive->tb); + adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); + + if (adev->asic_reset_res) + goto fail; - adev->asic_reset_res = amdgpu_asic_reset(adev); + task_barrier_exit(&hive->tb); + adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); + + if (adev->asic_reset_res) + goto fail; + } else { + + task_barrier_full(&hive->tb); + adev->asic_reset_res = amdgpu_asic_reset(adev); + } + +fail: if (adev->asic_reset_res) DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); } +static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + * In SR-IOV or passthrough mode, timeout for compute + * jobs are 10000 by default. + */ + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; + else + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + if (timeout == 0) { + index++; + continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else { + timeout = msecs_to_jiffies(timeout); + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) { + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; + } + } + + return ret; +} /** * amdgpu_device_init - initialize the driver @@ -2480,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { int r, i; - bool runtime = false; + bool boco = false; u32 max_MBps; adev->shutdown = false; @@ -2488,7 +2792,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; - adev->asic_type = flags & AMD_ASIC_MASK; + + if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) + adev->asic_type = amdgpu_force_asic_type; + else + adev->asic_type = flags & AMD_ASIC_MASK; + adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; if (amdgpu_emu_mode == 1) adev->usec_timeout *= 2; @@ -2498,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; - adev->vm_manager.vm_pte_num_rqs = 0; + adev->vm_manager.vm_pte_num_scheds = 0; adev->gmc.gmc_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -2509,6 +2818,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; + adev->pcie_rreg64 = &amdgpu_invalid_rreg64; + adev->pcie_wreg64 = &amdgpu_invalid_wreg64; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; @@ -2536,8 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); - mutex_init(&adev->virt.dpm_mutex); mutex_init(&adev->psp.mutex); + mutex_init(&adev->notifier_lock); r = amdgpu_device_check_arguments(adev); if (r) @@ -2629,6 +2940,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + r = amdgpu_device_get_job_timeout_settings(adev); + if (r) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return r; + } + /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -2637,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, * ignore it */ vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); - if (amdgpu_device_is_px(ddev)) - runtime = true; - if (!pci_is_thunderbolt_attached(adev->pdev)) + if (amdgpu_device_supports_boco(ddev)) + boco = true; + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + !pci_is_thunderbolt_attached(adev->pdev)) vga_switcheroo_register_client(adev->pdev, - &amdgpu_switcheroo_ops, runtime); - if (runtime) + &amdgpu_switcheroo_ops, boco); + if (boco) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); if (amdgpu_emu_mode == 1) { @@ -2729,11 +3049,17 @@ fence_driver_init: } dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); - if (amdgpu_virt_request_full_gpu(adev, false)) - amdgpu_virt_release_full_gpu(adev, false); goto failed; } + DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se, + adev->gfx.config.max_cu_per_sh, + adev->gfx.cu_info.number); + + amdgpu_ctx_init_sched(adev); + adev->accel_working = true; amdgpu_vm_check_compute_bug(adev); @@ -2748,16 +3074,19 @@ fence_driver_init: amdgpu_fbdev_init(adev); - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) - amdgpu_pm_virt_sysfs_init(adev); - r = amdgpu_pm_sysfs_init(adev); - if (r) + if (r) { + adev->pm_sysfs_en = false; DRM_ERROR("registering pm debugfs failed (%d).\n", r); + } else + adev->pm_sysfs_en = true; r = amdgpu_ucode_sysfs_init(adev); - if (r) + if (r) { + adev->ucode_sysfs_en = false; DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + } else + adev->ucode_sysfs_en = true; r = amdgpu_debugfs_gem_init(adev); if (r) @@ -2788,6 +3117,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ @@ -2819,7 +3155,7 @@ fence_driver_init: failed: amdgpu_vf_error_trans_all(adev); - if (runtime) + if (boco) vga_switcheroo_fini_domain_pm_ops(adev->dev); return r; @@ -2838,7 +3174,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -2848,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) drm_atomic_helper_shutdown(adev->ddev); } amdgpu_fence_driver_fini(adev); - amdgpu_pm_sysfs_fini(adev); + if (adev->pm_sysfs_en) + amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { @@ -2856,7 +3195,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->delayed_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); @@ -2866,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) kfree(adev->bios); adev->bios = NULL; - if (!pci_is_thunderbolt_attached(adev->pdev)) + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + !pci_is_thunderbolt_attached(adev->pdev)) vga_switcheroo_unregister_client(adev->pdev); - if (adev->flags & AMD_IS_PX) + if (amdgpu_device_supports_boco(adev->ddev)) vga_switcheroo_fini_domain_pm_ops(adev->dev); vga_client_register(adev->pdev, NULL, NULL, NULL); if (adev->rio_mem) @@ -2877,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) - amdgpu_pm_virt_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); device_remove_file(adev->dev, &dev_attr_pcie_replay_count); - amdgpu_ucode_sysfs_fini(adev); + if (adev->ucode_sysfs_en) + amdgpu_ucode_sysfs_fini(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); amdgpu_debugfs_preempt_cleanup(adev); @@ -2905,11 +3245,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) * Returns 0 for success or an error on failure. * Called at driver suspend. */ -int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) +int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev; struct drm_crtc *crtc; struct drm_connector *connector; + struct drm_connector_list_iter iter; int r; if (dev == NULL || dev->dev_private == NULL) { @@ -2932,9 +3273,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (!amdgpu_device_has_dc_support(adev)) { /* turn off display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_OFF); + drm_connector_list_iter_end(&iter); drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -2985,17 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - pci_save_state(dev->pdev); - if (suspend) { - /* Shut down the device */ - pci_disable_device(dev->pdev); - pci_set_power_state(dev->pdev, PCI_D3hot); - } else { - r = amdgpu_asic_reset(adev); - if (r) - DRM_ERROR("amdgpu asic reset failed\n"); - } - return 0; } @@ -3010,9 +3342,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) * Returns 0 for success or an error on failure. * Called at driver resume. */ -int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) +int amdgpu_device_resume(struct drm_device *dev, bool fbcon) { struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; int r = 0; @@ -3020,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (resume) { - pci_set_power_state(dev->pdev, PCI_D0); - pci_restore_state(dev->pdev); - r = pci_enable_device(dev->pdev); - if (r) - return r; - } - /* post card */ if (amdgpu_device_need_post(adev)) { r = amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -3083,9 +3408,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) /* turn on display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_ON); + drm_connector_list_iter_end(&iter); + drm_modeset_unlock_all(dev); } amdgpu_fbdev_set_suspend(adev, 0); @@ -3362,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_amdkfd_pre_reset(adev); - /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) goto error; + amdgpu_virt_init_data_exchange(adev); /* we need recover gart prior to run SMC/CP/SDMA resume */ amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); @@ -3386,10 +3714,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_amdkfd_post_reset(adev); error: - amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(&adev->vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3431,6 +3758,12 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA20: case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: goto disabled; @@ -3507,7 +3840,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) r = -EALREADY; } else r = amdgpu_asic_reset(tmp_adev); @@ -3519,7 +3852,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, } } - /* For XGMI wait for all PSP resets to complete before proceed */ + /* For XGMI wait for all resets to complete before proceed */ if (!r) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { @@ -3530,14 +3863,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, break; } } - - list_for_each_entry(tmp_adev, device_list_handle, - gmc.xgmi.head) { - amdgpu_ras_reserve_bad_pages(tmp_adev); - } } } + if (!r && amdgpu_ras_intr_triggered()) + amdgpu_ras_intr_cleared(); list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (need_full_reset) { @@ -3554,7 +3884,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); - atomic_inc(&tmp_adev->vram_lost_counter); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( @@ -3626,25 +3956,30 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); - adev->in_gpu_reset = 1; - /* Block kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_pre_reset(adev); + adev->in_gpu_reset = true; + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_MODE1: + adev->mp1_state = PP_MP1_STATE_SHUTDOWN; + break; + case AMD_RESET_METHOD_MODE2: + adev->mp1_state = PP_MP1_STATE_RESET; + break; + default: + adev->mp1_state = PP_MP1_STATE_NONE; + break; + } return true; } static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { - /*unlock kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); - adev->in_gpu_reset = 0; + adev->mp1_state = PP_MP1_STATE_NONE; + adev->in_gpu_reset = false; mutex_unlock(&adev->lock_reset); } - /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -3664,11 +3999,28 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + bool use_baco = + (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? + true : false; + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { + + DRM_WARN("Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU reset begin!\n"); + dev_info(adev->dev, "GPU %s begin!\n", + (in_ras_intr && !use_baco) ? "jobs stop":"reset"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -3684,20 +4036,27 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (hive && !mutex_trylock(&hive->reset_lock)) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job->base.id, hive->hive_id); + job ? job->base.id : -1, hive->hive_id); return 0; } /* Start with adev pre asic reset first for soft reset check.*/ if (!amdgpu_device_lock_adev(adev, !hive)) { DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", - job->base.id); + job ? job->base.id : -1); return 0; } + /* Block kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_pre_reset(adev); + /* Build list of devices to reset */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (!hive) { + /*unlock kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_post_reset(adev); amdgpu_device_unlock_adev(adev); return -ENODEV; } @@ -3713,17 +4072,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } - /* - * Mark these ASICs to be reseted as untracked first - * And add them back after reset completed - */ - list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) - amdgpu_unregister_gpu_instance(tmp_adev); - /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + if (tmp_adev != adev) { + amdgpu_device_lock_adev(tmp_adev, false); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); + } + + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed + */ + amdgpu_unregister_gpu_instance(tmp_adev); + /* disable ras on ALL IPs */ - if (amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!(in_ras_intr && !use_baco) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3732,11 +4097,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; - drm_sched_stop(&ring->sched, &job->base); + drm_sched_stop(&ring->sched, job ? &job->base : NULL); + + if (in_ras_intr && !use_baco) + amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } + if (in_ras_intr && !use_baco) + goto skip_sched_resume; + /* * Must check guilty signal here since after this point all old * HW fences are force signaled. @@ -3747,9 +4118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dma_fence_is_signaled(job->base.s_fence->parent)) job_signaled = true; - if (!amdgpu_device_ip_need_full_reset(adev)) - device_list_handle = &device_list; - if (job_signaled) { dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -3757,9 +4125,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* Guilty job will be freed after this*/ - r = amdgpu_device_pre_asic_reset(adev, - job, - &need_full_reset); + r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); if (r) { /*TODO Should we stop ?*/ DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -3773,7 +4139,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; - amdgpu_device_lock_adev(tmp_adev, false); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, &need_full_reset); @@ -3801,6 +4166,7 @@ skip_hw_reset: /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -3822,12 +4188,18 @@ skip_hw_reset: if (r) { /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); } + } +skip_sched_resume: + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /*unlock kfd: SRIOV would do it separately */ + if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } @@ -3975,3 +4347,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } +int amdgpu_device_baco_enter(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!amdgpu_device_supports_baco(adev->ddev)) + return -ENOTSUPP; + + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + + return amdgpu_dpm_baco_enter(adev); +} + +int amdgpu_device_baco_exit(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; + + if (!amdgpu_device_supports_baco(adev->ddev)) + return -ENOTSUPP; + + ret = amdgpu_dpm_baco_exit(adev); + if (ret) + return ret; + + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h new file mode 100644 index 000000000000..057f6ea645d7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DF_H__ +#define __AMDGPU_DF_H__ + +struct amdgpu_df_hash_status { + bool hash_64k; + bool hash_2m; + bool hash_1g; +}; + +struct amdgpu_df_funcs { + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); + uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, + uint32_t df_inst); + uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); +}; + +struct amdgpu_df { + struct amdgpu_df_hash_status hash_status; + const struct amdgpu_df_funcs *funcs; +}; + +#endif /* __AMDGPU_DF_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 1481899f86c1..f95092741c38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -134,20 +134,10 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { - uint32_t *p = (uint32_t *)binary; uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - BINARY_MAX_SIZE; - unsigned long flags; - - while (pos < vram_size) { - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); - *p++ = RREG32_NO_KIQ(mmMM_DATA); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - pos += 4; - } + uint64_t pos = vram_size - DISCOVERY_TMR_SIZE; + amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false); return 0; } @@ -179,7 +169,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev) uint16_t checksum; int r; - adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL); + adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); if (!adev->discovery) return -ENOMEM; @@ -333,7 +323,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, - int *major, int *minor) + int *major, int *minor, int *revision) { struct binary_header *bhdr; struct ip_discovery_header *ihdr; @@ -369,6 +359,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, *major = ip->major; if (minor) *minor = ip->minor; + if (revision) + *revision = ip->revision; return 0; } ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 85b8c4d4d576..ba78e15d9b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,11 +24,13 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ +#define DISCOVERY_TMR_SIZE (64 << 10) + int amdgpu_discovery_init(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, - int *major, int *minor); + int *major, int *minor, int *revision); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 535650967b1a..6d520a3eec40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -191,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { - r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); + r = amdgpu_bo_pin(new_abo, + amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -204,7 +205,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, + r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, &work->shared_count, &work->shared); if (unlikely(r != 0)) { @@ -369,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct drm_connector_list_iter iter; uint32_t devices; int i = 0; + drm_connector_list_iter_begin(dev, &iter); DRM_INFO("AMDGPU Display Connectors\n"); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); DRM_INFO("Connector %d:\n", i); DRM_INFO(" %s\n", connector->name); @@ -437,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) } i++; } + drm_connector_list_iter_end(&iter); } /** @@ -495,15 +499,37 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; #if defined(CONFIG_DRM_AMD_DC) - if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && - adev->flags & AMD_IS_APU && - amdgpu_device_asic_has_dc_support(adev->asic_type)) - domain |= AMDGPU_GEM_DOMAIN_GTT; + /* + * if amdgpu_bo_support_uswc returns false it means that USWC mappings + * is not supported for this board. But this mapping is required + * to avoid hang caused by placement of scanout BO in GTT on certain + * APUs. So force the BO placement to VRAM in case this architecture + * will not allow USWC mappings. + * Also, don't allow GTT domain if the BO doens't have USWC falg set. + */ + if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + amdgpu_bo_support_uswc(bo_flags) && + amdgpu_device_asic_has_dc_support(adev->asic_type)) { + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + case CHIP_RAVEN: + /* enable S/G on PCO and RV2 */ + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: + break; + } + } #endif return domain; @@ -674,7 +700,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct amdgpu_encoder *amdgpu_encoder; struct drm_connector *connector; - struct amdgpu_connector *amdgpu_connector; u32 src_v = 1, dst_v = 1; u32 src_h = 1, dst_h = 1; @@ -686,7 +711,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, continue; amdgpu_encoder = to_amdgpu_encoder(encoder); connector = amdgpu_get_connector_for_encoder(encoder); - amdgpu_connector = to_amdgpu_connector(connector); /* set scaling */ if (amdgpu_encoder->rmx_type == RMX_OFF) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 06b922fe0d42..3620b24785e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -38,7 +38,8 @@ int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 489041df1f45..a59cd47aa6c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -34,27 +34,12 @@ #include "amdgpu.h" #include "amdgpu_display.h" #include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> #include <linux/dma-fence-array.h> /** - * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table - * implementation - * @obj: GEM buffer object (BO) - * - * Returns: - * A scatter/gather table for the pinned pages of the BO's memory. - */ -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int npages = bo->tbo.num_pages; - - return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages); -} - -/** * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation * @obj: GEM BO * @@ -137,23 +122,23 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, } static int -__reservation_object_make_exclusive(struct reservation_object *obj) +__dma_resv_make_exclusive(struct dma_resv *obj) { struct dma_fence **fences; unsigned int count; int r; - if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + if (!dma_resv_get_list(obj)) /* no shared fences to convert */ return 0; - r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences); if (r) return r; if (count == 0) { /* Now that was unexpected. */ } else if (count == 1) { - reservation_object_add_excl_fence(obj, fences[0]); + dma_resv_add_excl_fence(obj, fences[0]); dma_fence_put(fences[0]); kfree(fences); } else { @@ -165,7 +150,7 @@ __reservation_object_make_exclusive(struct reservation_object *obj) if (!array) goto err_fences_put; - reservation_object_add_excl_fence(obj, &array->base); + dma_resv_add_excl_fence(obj, &array->base); dma_fence_put(&array->base); } @@ -179,106 +164,126 @@ err_fences_put: } /** - * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation - * @dma_buf: Shared DMA buffer - * @attach: DMA-buf attachment + * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * - * Makes sure that the shared DMA buffer can be accessed by the target device. - * For now, simply pins it to the GTT domain, where it should be accessible by - * all DMA devices. + * @dmabuf: DMA-buf where we attach to + * @attach: attachment to add * - * Returns: - * 0 on success or a negative error code on failure. + * Add the attachment as user to the exported DMA-buf. */ -static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = dma_buf->priv; + struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - long r; + int r; - r = drm_gem_map_attach(dma_buf, attach); - if (r) - return r; + if (attach->dev->driver == adev->dev->driver) + return 0; r = amdgpu_bo_reserve(bo, false); if (unlikely(r != 0)) - goto error_detach; - - - if (attach->dev->driver != adev->dev->driver) { - /* - * We only create shared fences for internal use, but importers - * of the dmabuf rely on exclusive fences for implicitly - * tracking write hazards. As any of the current fences may - * correspond to a write, we need to convert all existing - * fences on the reservation object into a single exclusive - * fence. - */ - r = __reservation_object_make_exclusive(bo->tbo.resv); - if (r) - goto error_unreserve; - } + return r; - /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + /* + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. + */ + r = __dma_resv_make_exclusive(bo->tbo.base.resv); if (r) - goto error_unreserve; - - if (attach->dev->driver != adev->dev->driver) - bo->prime_shared_count++; + return r; -error_unreserve: + bo->prime_shared_count++; amdgpu_bo_unreserve(bo); + return 0; +} -error_detach: - if (r) - drm_gem_map_detach(dma_buf, attach); - return r; +/** + * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation + * + * @dmabuf: DMA-buf where we remove the attachment from + * @attach: the attachment to remove + * + * Called when an attachment is removed from the DMA-buf. + */ +static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = dmabuf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + + if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) + bo->prime_shared_count--; } /** - * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation - * @dma_buf: Shared DMA buffer + * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation * @attach: DMA-buf attachment + * @dir: DMA direction * - * This is called when a shared DMA buffer no longer needs to be accessible by - * another device. For now, simply unpins the buffer from GTT. + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * sg_table filled with the DMA addresses to use or ERR_PRT with negative error + * code. */ -static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, + enum dma_data_direction dir) { + struct dma_buf *dma_buf = attach->dmabuf; struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int ret = 0; + struct sg_table *sgt; + long r; - ret = amdgpu_bo_reserve(bo, true); - if (unlikely(ret != 0)) - goto error; + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return ERR_PTR(r); - amdgpu_bo_unpin(bo); - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) - bo->prime_shared_count--; - amdgpu_bo_unreserve(bo); + sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages); + if (IS_ERR(sgt)) + return sgt; -error: - drm_gem_map_detach(dma_buf, attach); + if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC)) + goto error_free; + + return sgt; + +error_free: + sg_free_table(sgt); + kfree(sgt); + return ERR_PTR(-ENOMEM); } /** - * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM BO + * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation + * @attach: DMA-buf attachment + * @sgt: sg_table to unmap + * @dir: DMA direction * - * Returns: - * The BO's reservation object. + * This is called when a shared DMA buffer no longer needs to be accessible by + * another device. For now, simply unpins the buffer from GTT. */ -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) +static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) { + struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - return bo->tbo.resv; + dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir); + sg_free_table(sgt); + kfree(sgt); + amdgpu_bo_unpin(bo); } /** @@ -299,7 +304,7 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_supported_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev, bo->flags); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); @@ -322,10 +327,11 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, } const struct dma_buf_ops amdgpu_dmabuf_ops = { - .attach = amdgpu_dma_buf_map_attach, - .detach = amdgpu_dma_buf_map_detach, - .map_dma_buf = drm_gem_map_dma_buf, - .unmap_dma_buf = drm_gem_unmap_dma_buf, + .dynamic_mapping = true, + .attach = amdgpu_dma_buf_attach, + .detach = amdgpu_dma_buf_detach, + .map_dma_buf = amdgpu_dma_buf_map, + .unmap_dma_buf = amdgpu_dma_buf_unmap, .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, @@ -335,18 +341,15 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation - * @dev: DRM device * @gobj: GEM BO * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * - * The main work is done by the &drm_gem_prime_export helper, which in turn - * uses &amdgpu_gem_prime_res_obj. + * The main work is done by the &drm_gem_prime_export helper. * * Returns: * Shared DMA buffer representing the GEM BO from the given device. */ -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, int flags) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); @@ -356,63 +359,56 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); - buf = drm_gem_prime_export(dev, gobj, flags); - if (!IS_ERR(buf)) { - buf->file->f_mapping = dev->anon_inode->i_mapping; + buf = drm_gem_prime_export(gobj, flags); + if (!IS_ERR(buf)) buf->ops = &amdgpu_dmabuf_ops; - } return buf; } /** - * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table - * implementation + * amdgpu_dma_buf_create_obj - create BO for DMA-buf import + * * @dev: DRM device - * @attach: DMA-buf attachment - * @sg: Scatter/gather table + * @dma_buf: DMA-buf * - * Imports shared DMA buffer memory exported by another device. + * Creates an empty SG BO for DMA-buf import. * * Returns: * A new GEM BO of the given DRM device, representing the memory * described by the given DMA-buf attachment and scatter/gather table. */ -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg) +static struct drm_gem_object * +amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) { - struct reservation_object *resv = attach->dmabuf->resv; + struct dma_resv *resv = dma_buf->resv; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; int ret; memset(&bp, 0, sizeof(bp)); - bp.size = attach->dmabuf->size; + bp.size = dma_buf->size; bp.byte_align = PAGE_SIZE; bp.domain = AMDGPU_GEM_DOMAIN_CPU; bp.flags = 0; bp.type = ttm_bo_type_sg; bp.resv = resv; - ww_mutex_lock(&resv->lock, NULL); + dma_resv_lock(resv, NULL); ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) goto error; - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) + if (dma_buf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count = 1; - ww_mutex_unlock(&resv->lock); - return &bo->gem_base; + dma_resv_unlock(resv); + return &bo->tbo.base; error: - ww_mutex_unlock(&resv->lock); + dma_resv_unlock(resv); return ERR_PTR(ret); } @@ -421,15 +417,15 @@ error: * @dev: DRM device * @dma_buf: Shared DMA buffer * - * The main work is done by the &drm_gem_prime_import helper, which in turn - * uses &amdgpu_gem_prime_import_sg_table. + * Import a dma_buf into a the driver and potentially create a new GEM object. * * Returns: * GEM BO representing the shared DMA buffer for the given device. */ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) + struct dma_buf *dma_buf) { + struct dma_buf_attachment *attach; struct drm_gem_object *obj; if (dma_buf->ops == &amdgpu_dmabuf_ops) { @@ -444,5 +440,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, } } - return drm_gem_prime_import(dev, dma_buf); + obj = amdgpu_dma_buf_create_obj(dev, dma_buf); + if (IS_ERR(obj)) + return obj; + + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true); + if (IS_ERR(attach)) { + drm_gem_object_put(obj); + return ERR_CAST(attach); + } + + get_dma_buf(dma_buf); + obj->import_attach = attach; + return obj; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index c7056cbe8685..ec447a7b6b28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -25,17 +25,10 @@ #include <drm/drm_gem.h> -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, int flags); struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 790263dcc064..3fa18003d4d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -130,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_IH = 0x178, /* MMSCH: 392~407 * overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD */ - AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */ AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 61bd10310604..a2e8c3dfb4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -911,7 +911,8 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL); + !low ? &clk_freq : NULL, + true); if (ret) return 0; return clk_freq * 100; @@ -928,7 +929,8 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL); + !low ? &clk_freq : NULL, + true); if (ret) return 0; return clk_freq * 100; @@ -944,20 +946,63 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block bool swsmu = is_support_sw_smu(adev); switch (block_type) { - case AMD_IP_BLOCK_TYPE_GFX: case AMD_IP_BLOCK_TYPE_UVD: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: + if (swsmu) { + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) { + /* + * TODO: need a better lock mechanism + * + * Here adev->pm.mutex lock protection is enforced on + * UVD and VCE cases only. Since for other cases, there + * may be already lock protection in amdgpu_pm.c. + * This is a quick fix for the deadlock issue below. + * NFO: task ocltst:2028 blocked for more than 120 seconds. + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + * cltst D 0 2028 2026 0x00000000 + * all Trace: + * __schedule+0x2c0/0x870 + * schedule+0x2c/0x70 + * schedule_preempt_disabled+0xe/0x10 + * __mutex_lock.isra.9+0x26d/0x4e0 + * __mutex_lock_slowpath+0x13/0x20 + * ? __mutex_lock_slowpath+0x13/0x20 + * mutex_lock+0x2f/0x40 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] + */ + mutex_lock(&adev->pm.mutex); + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + mutex_unlock(&adev->pm.mutex); + } + break; + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); - else + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; + case AMD_IP_BLOCK_TYPE_JPEG: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + break; case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: - case AMD_IP_BLOCK_TYPE_SDMA: - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; default: @@ -966,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block return ret; } + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + } + + return ret; +} + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_exit(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + } + + return ret; +} + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_set_mp1_state(&adev->smu, mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + ret = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + mp1_state); + } + + return ret; +} + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + bool baco_cap; + + if (is_support_sw_smu(adev)) { + return smu_baco_is_support(smu); + } else { + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) + return false; + + if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) + return false; + + return baco_cap ? true : false; + } +} + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) { + return smu_mode2_reset(smu); + } else { + if (!pp_funcs || !pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return pp_funcs->asic_reset_mode_2(pp_handle); + } +} + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + dev_info(adev->dev, "GPU BACO reset\n"); + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + if (ret) + return ret; + + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!pp_funcs + || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + if (ret) + return ret; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) + ret = smu_switch_power_profile(&adev->smu, type, en); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + ret = adev->powerplay.pp_funcs->switch_power_profile( + adev->powerplay.pp_handle, type, en); + + return ret; +} + +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate) +{ + int ret = 0; + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 1c5c0fd76dbf..902ca6c00cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -298,12 +298,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_get_current_power_state(adev) \ ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) -#define amdgpu_smu_get_current_power_state(adev) \ - ((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu))) - -#define amdgpu_smu_set_power_state(adev) \ - ((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu))) - #define amdgpu_dpm_get_pp_num_states(adev, data) \ ((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)) @@ -347,10 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type, en) \ - ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type, en)) - #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) @@ -523,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate); + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en); + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state); + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5376328d3fd0..94e2fd758e01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -42,6 +43,8 @@ #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" + /* * KMS wrapper. * - 3.0.0 - initial driver @@ -79,13 +82,14 @@ * - 3.31.0 - Add support for per-flip tiling attribute changes with DC * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. + * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask + * - 3.36.0 - Allow reading more status registers on si/cik */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 33 +#define KMS_DRIVER_MINOR 36 #define KMS_DRIVER_PATCHLEVEL 0 -#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 - int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -98,7 +102,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -125,11 +129,7 @@ char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; /* OverDrive(bit 14) disabled by default*/ uint amdgpu_pp_feature_mask = 0xffffbfff; -int amdgpu_ngg = 0; -int amdgpu_prim_buf_per_se = 0; -int amdgpu_pos_buf_per_se = 0; -int amdgpu_cntl_sb_buf_per_se = 0; -int amdgpu_param_buf_per_se = 0; +uint amdgpu_force_long_training = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; @@ -143,12 +143,13 @@ int amdgpu_mcbp = 0; int amdgpu_discovery = -1; int amdgpu_mes = 0; int amdgpu_noretry; +int amdgpu_force_asic_type = -1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; int amdgpu_ras_enable = -1; -uint amdgpu_ras_mask = 0xfffffffb; +uint amdgpu_ras_mask = 0xffffffff; /** * DOC: vramlimit (int) @@ -241,16 +242,21 @@ module_param_named(msi, amdgpu_msi, int, 0444); * * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or * multiple values specified. 0 and negative values are invalidated. They will be adjusted - * to default timeout. - * - With one value specified, the setting will apply to all non-compute jobs. - * - With multiple values specified, the first one will be for GFX. The second one is for Compute. - * And the third and fourth ones are for SDMA and Video. + * to the default timeout. + * + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. + * The second one is for Compute. The third and fourth ones are + * for SDMA and Video. + * * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) * jobs is 10000. And there is no timeout enforced on compute jobs. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs." +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " + "for passthrough or sriov, 10000 for all jobs." " 0: keep default value. negative: infinity timeout), " - "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); + "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** @@ -389,6 +395,14 @@ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); /** + * DOC: forcelongtraining (uint) + * Force long memory training in resume. + * The default is zero, indicates short training in resume. + */ +MODULE_PARM_DESC(forcelongtraining, "force memory long training"); +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444); + +/** * DOC: pcie_gen_cap (uint) * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. * The default is 0 (automatic for each asic). @@ -446,42 +460,6 @@ MODULE_PARM_DESC(virtual_display, module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); /** - * DOC: ngg (int) - * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). - */ -MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); -module_param_named(ngg, amdgpu_ngg, int, 0444); - -/** - * DOC: prim_buf_per_se (int) - * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); -module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); - -/** - * DOC: pos_buf_per_se (int) - * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); -module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); - -/** - * DOC: cntl_sb_buf_per_se (int) - * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); -module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); - -/** - * DOC: param_buf_per_se (int) - * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. - * The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); -module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); - -/** * DOC: job_hang_limit (int) * Set how much time allow a job hang and not drop it. The default is 0. */ @@ -613,6 +591,16 @@ MODULE_PARM_DESC(noretry, "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); module_param_named(noretry, amdgpu_noretry, int, 0644); +/** + * DOC: force_asic_type (int) + * A non negative value used to specify the asic type for all supported GPUs. + */ +MODULE_PARM_DESC(force_asic_type, + "A non negative value used to specify the asic type for all supported GPUs"); +module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); + + + #ifdef CONFIG_HSA_AMD /** * DOC: sched_policy (int) @@ -996,6 +984,11 @@ static const struct pci_device_id pciidlist[] = { /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + /* Arcturus */ + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, /* Navi10 */ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1004,6 +997,18 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + /* Navi14 */ + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + + /* Renoir */ + {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + + /* Navi12 */ + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -1030,8 +1035,43 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ - ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); + ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); if (ret) return ret; @@ -1074,7 +1114,10 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); +#ifdef MODULE + if (THIS_MODULE->state != MODULE_STATE_GOING) +#endif + DRM_ERROR("Hotplug removal is not supported\n"); drm_dev_unplug(dev); drm_dev_put(dev); pci_disable_device(pdev); @@ -1087,92 +1130,117 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = dev->dev_private; + if (amdgpu_ras_intr_triggered()) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain * hypervisors so just do this all the time. */ + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); + adev->mp1_state = PP_MP1_STATE_NONE; } static int amdgpu_pmops_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_suspend(drm_dev, true, true); + return amdgpu_device_suspend(drm_dev, true); } static int amdgpu_pmops_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); /* GPU comes up enabled by the bios on resume */ - if (amdgpu_device_is_px(drm_dev)) { + if (amdgpu_device_supports_boco(drm_dev) || + amdgpu_device_supports_baco(drm_dev)) { pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); } - return amdgpu_device_resume(drm_dev, true, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_freeze(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_dev->dev_private; + int r; - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_suspend(drm_dev, false, true); + r = amdgpu_device_suspend(drm_dev, true); + if (r) + return r; + return amdgpu_asic_reset(adev); } static int amdgpu_pmops_thaw(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_resume(drm_dev, false, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_suspend(drm_dev, true, true); + return amdgpu_device_suspend(drm_dev, true); } static int amdgpu_pmops_restore(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_resume(drm_dev, false, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); - int ret; + struct amdgpu_device *adev = drm_dev->dev_private; + int ret, i; - if (!amdgpu_device_is_px(drm_dev)) { + if (!adev->runpm) { pm_runtime_forbid(dev); return -EBUSY; } - drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* wait for all rings to drain before suspending */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) { + ret = amdgpu_fence_wait_empty(ring); + if (ret) + return -EBUSY; + } + } + + if (amdgpu_device_supports_boco(drm_dev)) + drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(drm_dev); - ret = amdgpu_device_suspend(drm_dev, false, false); - pci_save_state(pdev); - pci_disable_device(pdev); - pci_ignore_hotplug(pdev); - if (amdgpu_is_atpx_hybrid()) - pci_set_power_state(pdev, PCI_D3cold); - else if (!amdgpu_has_atpx_dgpu_power_cntl()) - pci_set_power_state(pdev, PCI_D3hot); - drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + ret = amdgpu_device_suspend(drm_dev, false); + if (amdgpu_device_supports_boco(drm_dev)) { + /* Only need to handle PCI state in the driver for ATPX + * PCI core handles it for _PR3. + */ + if (amdgpu_is_atpx_hybrid()) { + pci_ignore_hotplug(pdev); + } else { + pci_save_state(pdev); + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); + } + drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + } else if (amdgpu_device_supports_baco(drm_dev)) { + amdgpu_device_baco_enter(drm_dev); + } return 0; } @@ -1181,35 +1249,45 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_dev->dev_private; int ret; - if (!amdgpu_device_is_px(drm_dev)) + if (!adev->runpm) return -EINVAL; - drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + if (amdgpu_device_supports_boco(drm_dev)) { + drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - if (amdgpu_is_atpx_hybrid() || - !amdgpu_has_atpx_dgpu_power_cntl()) - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - ret = pci_enable_device(pdev); - if (ret) - return ret; - pci_set_master(pdev); - - ret = amdgpu_device_resume(drm_dev, false, false); + /* Only need to handle PCI state in the driver for ATPX + * PCI core handles it for _PR3. + */ + if (amdgpu_is_atpx_hybrid()) { + pci_set_master(pdev); + } else { + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) + return ret; + pci_set_master(pdev); + } + } else if (amdgpu_device_supports_baco(drm_dev)) { + amdgpu_device_baco_exit(drm_dev); + } + ret = amdgpu_device_resume(drm_dev, false); drm_kms_helper_poll_enable(drm_dev); - drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; + if (amdgpu_device_supports_boco(drm_dev)) + drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; return 0; } static int amdgpu_pmops_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_dev->dev_private; struct drm_crtc *crtc; - if (!amdgpu_device_is_px(drm_dev)) { + if (!adev->runpm) { pm_runtime_forbid(dev); return -EBUSY; } @@ -1299,66 +1377,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } -int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) -{ - char *input = amdgpu_lockup_timeout; - char *timeout_setting = NULL; - int index = 0; - long timeout; - int ret = 0; - - /* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. - */ - adev->gfx_timeout = msecs_to_jiffies(10000); - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; - - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { - while ((timeout_setting = strsep(&input, ",")) && - strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { - ret = kstrtol(timeout_setting, 0, &timeout); - if (ret) - return ret; - - if (timeout == 0) { - index++; - continue; - } else if (timeout < 0) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else { - timeout = msecs_to_jiffies(timeout); - } - - switch (index++) { - case 0: - adev->gfx_timeout = timeout; - break; - case 1: - adev->compute_timeout = timeout; - break; - case 2: - adev->sdma_timeout = timeout; - break; - case 3: - adev->video_timeout = timeout; - break; - default: - break; - } - } - /* - * There is only one value specified and - * it should apply to all non-compute jobs. - */ - if (index == 1) - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - } - - return ret; -} - static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, @@ -1373,7 +1391,8 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, @@ -1397,9 +1416,6 @@ static struct drm_driver kms_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_res_obj = amdgpu_gem_prime_res_obj, - .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, - .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, .gem_prime_vmap = amdgpu_gem_prime_vmap, .gem_prime_vunmap = amdgpu_gem_prime_vunmap, .gem_prime_mmap = amdgpu_gem_prime_mmap, @@ -1464,6 +1480,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 571a6dfb473e..61fcf247a638 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + drm_connector_list_iter_begin(dev, &iter); /* walk the list and link encoders to connectors */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev) } } } + drm_connector_list_iter_end(&iter); } void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) @@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector; + struct drm_connector_list_iter iter; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; @@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) amdgpu_connector->devices, encoder->encoder_type); } } + drm_connector_list_iter_end(&iter); } struct drm_connector * @@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->active_device & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->active_device & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_connector * @@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->devices & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->devices & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index eb3569b46c1e..2672dc64a310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user) return 0; } -static struct fb_ops amdgpufb_ops = { +static const struct fb_ops amdgpufb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, .fb_open = amdgpufb_open, @@ -131,6 +131,10 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, int aligned_size, size; int height = mode_cmd->height; u32 cpp; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; @@ -138,15 +142,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_supported_domains(adev); - + domain = amdgpu_display_supported_domains(adev, flags); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, ttm_bo_type_kernel, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); @@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, dev_err(adev->dev, "FB failed to set tiling flags\n"); } - ret = amdgpu_bo_pin(abo, domain); if (ret) { amdgpu_bo_unreserve(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 23085b352cf2..3c01252b1e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -34,6 +34,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/firmware.h> +#include <linux/pm_runtime.h> #include <drm/drm_debugfs.h> @@ -154,7 +155,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - + pm_runtime_get_noresume(adev->ddev->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -234,6 +235,7 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) bool amdgpu_fence_process(struct amdgpu_ring *ring) { struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct amdgpu_device *adev = ring->adev; uint32_t seq, last_seq; int r; @@ -274,6 +276,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) BUG(); dma_fence_put(fence); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); } while (last_seq != seq); return true; @@ -462,18 +466,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timeout = adev->gfx_timeout; break; case AMDGPU_RING_TYPE_COMPUTE: - /* - * For non-sriov case, no timeout enforce - * on compute ring by default. Unless user - * specifies a timeout for compute ring. - * - * For sriov case, always use the timeout - * as gfx ring - */ - if (!amdgpu_sriov_vf(ring->adev)) - timeout = adev->compute_timeout; - else - timeout = adev->gfx_timeout; + timeout = adev->compute_timeout; break; case AMDGPU_RING_TYPE_SDMA: timeout = adev->sdma_timeout; @@ -748,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return 0; seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d79ab1da9e07..e01e681d2a60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -71,7 +71,7 @@ */ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) { - struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page; + struct page *dummy_page = ttm_bo_glob.dummy_read_page; if (adev->dummy_page_addr) return 0; @@ -251,7 +251,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + return 0; } @@ -300,6 +302,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, * @pages: number of pages to bind * @pagelist: pages to bind * @dma_addr: DMA addresses of pages + * @flags: page table entry flags * * Binds the requested pages to the gart page table * (all asics). @@ -310,9 +313,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, uint64_t flags) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS - unsigned i,t,p; + unsigned t,p; #endif - int r; + int r, i; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -336,7 +339,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 939f8305511b..4277125a79ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -50,7 +50,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, + struct dma_resv *resv, struct drm_gem_object **obj) { struct amdgpu_bo *bo; @@ -85,7 +85,7 @@ retry: } return r; } - *obj = &bo->gem_base; + *obj = &bo->tbo.base; return 0; } @@ -134,7 +134,7 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, return -EPERM; if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && - abo->tbo.resv != vm->root.base.bo->tbo.resv) + abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return -EPERM; r = amdgpu_bo_reserve(abo, false); @@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false); + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); if (r) { dev_err(adev->dev, "leaking bo va because " "we fail to reserve bo (%d)\n", r); @@ -215,7 +215,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_gem_create *args = data; uint64_t flags = args->in.domain_flags; uint64_t size = args->in.bo_size; - struct reservation_object *resv = NULL; + struct dma_resv *resv = NULL; struct drm_gem_object *gobj; uint32_t handle; int r; @@ -252,7 +252,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (r) return r; - resv = vm->root.base.bo->tbo.resv; + resv = vm->root.base.bo->tbo.base.resv; } r = amdgpu_gem_object_create(adev, size, args->in.alignment, @@ -291,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, uint32_t handle; int r; + args->addr = untagged_addr(args->addr); + if (offset_in_page(args->addr | args->size)) return -EINVAL; @@ -433,7 +435,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, + ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, timeout); /* ret == 0 means not signaled, @@ -525,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } +/** + * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags + * + * @adev: amdgpu_device pointer + * @flags: GEM UAPI flags + * + * Returns the GEM UAPI flags mapped into hardware for the ASIC. + */ +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + if (adev->gmc.gmc_funcs->map_mtype) + pte_flag |= amdgpu_gmc_map_mtype(adev, + flags & AMDGPU_VM_MTYPE_MASK); + + return pte_flag; +} + int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -611,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false); + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); if (r) goto error_unref; @@ -629,7 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -644,7 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -689,7 +719,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_gem_create_in info; void __user *out = u64_to_user_ptr(args->value); - info.bo_size = robj->gem_base.size; + info.bo_size = robj->tbo.base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; @@ -747,7 +777,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; u32 domain; int r; @@ -764,7 +795,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_pin_domain(adev, - amdgpu_display_supported_domains(adev)); + amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); if (r) @@ -819,8 +850,8 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) if (pin_count) seq_printf(m, " pin count %d", pin_count); - dma_buf = READ_ONCE(bo->gem_base.dma_buf); - attachment = READ_ONCE(bo->gem_base.import_attach); + dma_buf = READ_ONCE(bo->tbo.base.dma_buf); + attachment = READ_ONCE(bo->tbo.base.import_attach); if (attachment) seq_printf(m, " imported from %p", dma_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index b8ba6e27c61f..e0f025dd1b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -31,7 +31,7 @@ */ #define AMDGPU_GEM_DOMAIN_MAX 0x3 -#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) +#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) void amdgpu_gem_object_free(struct drm_gem_object *obj); int amdgpu_gem_object_open(struct drm_gem_object *obj, @@ -47,7 +47,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, + struct dma_resv *resv, struct drm_gem_object **obj); int amdgpu_mode_dumb_create(struct drm_file *file_priv, @@ -67,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 74066e1466f7..0f960b498792 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" +#include "amdgpu_ras.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -231,12 +232,10 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { - int i, queue, pipe, me; + int i, queue, me; for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { queue = i % adev->gfx.me.num_queue_per_pipe; - pipe = (i / adev->gfx.me.num_queue_per_pipe) - % adev->gfx.me.num_pipe_per_me; me = (i / adev->gfx.me.num_queue_per_pipe) / adev->gfx.me.num_pipe_per_me; @@ -297,7 +296,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); if (r) return r; @@ -320,10 +319,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, return r; } -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -389,7 +387,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { /* create MQD for each KGQ */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -437,7 +435,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) struct amdgpu_ring *ring = NULL; int i; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; kfree(adev->gfx.me.mqd_backup[i]); @@ -456,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, @@ -547,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) - return; - - mutex_lock(&adev->gfx.gfx_off_mutex); if (!enable) @@ -569,3 +559,194 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } + +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + .debugfs_name = "gfx_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + if (!adev->gfx.ras_if) { + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gfx.ras_if) + return -ENOMEM; + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if->sub_block_index = 0; + strcpy(adev->gfx.ras_if->name, "gfx"); + } + fs_info.head = ih_info.head = *adev->gfx.ras_if; + + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto late_fini; + } else { + /* free gfx ras_if if ras is not supported */ + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); +free: + kfree(adev->gfx.ras_if); + adev->gfx.ras_if = NULL; + return r; +} + +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. + * + * When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev); + } + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_rreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_rreg(ring, reg); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[kiq->reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; +} + +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_wreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_wreg(ring, reg, v); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1199b5828b90..ca17ffb01301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { @@ -90,6 +94,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + uint32_t reg_val_offs; }; /* @@ -165,6 +170,7 @@ struct amdgpu_gfx_config { uint32_t num_sc_per_sh; uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { @@ -196,28 +202,8 @@ struct amdgpu_gfx_funcs { uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); -}; - -struct amdgpu_ngg_buf { - struct amdgpu_bo *bo; - uint64_t gpu_addr; - uint32_t size; - uint32_t bo_size; -}; - -enum { - NGG_PRIM = 0, - NGG_POS, - NGG_CNTL, - NGG_PARAM, - NGG_BUF_MAX -}; - -struct amdgpu_ngg { - struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; - uint32_t gds_reserve_addr; - uint32_t gds_reserve_size; - bool init; + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; struct sq_work { @@ -244,7 +230,7 @@ struct amdgpu_me { uint32_t num_me; uint32_t num_pipe_per_me; uint32_t num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); @@ -286,9 +272,14 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; + struct drm_gpu_scheduler *gfx_sched[AMDGPU_MAX_GFX_RINGS]; + uint32_t num_gfx_sched; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; + struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS]; + uint32_t num_compute_sched; unsigned num_compute_rings; struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; @@ -308,9 +299,6 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; - /* NGG */ - struct amdgpu_ngg ngg; - /* gfx off */ bool gfx_off_state; /* true: enabled, false: disabled */ struct mutex gfx_off_mutex; @@ -352,8 +340,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq); -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq); +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, @@ -381,5 +368,14 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); - +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 924d83e711ef..5884ab590486 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -27,6 +27,8 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include "amdgpu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /** * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO @@ -220,6 +222,14 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); u64 size_af, size_bf; + if (amdgpu_sriov_vf(adev)) { + mc->agp_start = 0xffffffffffff; + mc->agp_end = 0x0; + mc->agp_size = 0; + + return; + } + if (mc->fb_start > mc->gart_start) { size_bf = (mc->fb_start & sixteen_gb_mask) - ALIGN(mc->gart_end + 1, sixteen_gb); @@ -297,3 +307,69 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, gmc->fault_hash[hash].idx = gmc->last_fault++; return false; } + +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { + r = adev->umc.funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { + r = adev->mmhub.funcs->ras_late_init(adev); + if (r) + return r; + } + + return amdgpu_xgmi_ras_late_init(adev); +} + +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_umc_ras_fini(adev); + amdgpu_mmhub_ras_fini(adev); + amdgpu_xgmi_ras_fini(adev); +} + + /* + * The latest engine allocation on gfx9/10 is: + * Engine 2, 3: firmware + * Engine 0, 1, 4~16: amdgpu ring, + * subject to change when ring number changes + * Engine 17: Gart flushes + */ +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 +#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 + +int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, + GFXHUB_FREE_VM_INV_ENGS_BITMAP}; + unsigned i; + unsigned vmhub, inv_eng; + + for (i = 0; i < adev->num_rings; ++i) { + ring = adev->rings[i]; + vmhub = ring->funcs->vmhub; + + inv_eng = ffs(vm_inv_engs[vmhub]); + if (!inv_eng) { + dev_err(adev->dev, "no VM inv eng for ring %s\n", + ring->name); + return -EINVAL; + } + + ring->vm_inv_eng = inv_eng - 1; + vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); + + dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", + ring->name, ring->vm_inv_eng, ring->funcs->vmhub); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 071145ac67b5..d3c27a3c43f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; @@ -89,8 +90,11 @@ struct amdgpu_vmhub { */ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type); + void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -99,12 +103,15 @@ struct amdgpu_gmc_funcs { unsigned pasid); /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* set pte flags based per asic */ - uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, - uint32_t flags); + /* map mtype to hardware flags */ + uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); + /* get the pte flags to use for a BO VA mapping */ + void (*get_vm_pte)(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags); }; struct amdgpu_xgmi { @@ -120,21 +127,52 @@ struct amdgpu_xgmi { /* gpu list in the same hive */ struct list_head head; bool supported; + struct ras_common_if *ras_if; }; struct amdgpu_gmc { + /* FB's physical address in MMIO space (for CPU to + * map FB). This is different compared to the agp/ + * gart/vram_start/end field as the later is from + * GPU's view and aper_base is from CPU's view. + */ resource_size_t aper_size; resource_size_t aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + /* AGP aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place AGP by setting MC_VM_AGP_BOT/TOP registers + * Under VMID0, logical address == MC address. AGP + * aperture maps to physical bus or IOVA addressed. + * AGP aperture is used to simulate FB in ZFB case. + * AGP aperture is also used for page table in system + * memory (mainly for APU). + * + */ u64 agp_size; u64 agp_start; u64 agp_end; + /* GART aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR + * registers + * Under VMID0, logical address inside GART aperture will + * be translated through gpuvm gart page table to access + * paged system memory + */ u64 gart_size; u64 gart_start; u64 gart_end; + /* Frame buffer aperture of this GPU device. Different from + * fb_start (see below), this only covers the local GPU device. + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) + * and calculate vram_start of this local device by adding an + * offset inside the XGMI hive. + * Under VMID0, logical address == MC address + */ u64 vram_start; u64 vram_end; /* FB region , it's same as local vram region in single GPU, in XGMI @@ -153,6 +191,7 @@ struct amdgpu_gmc { uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; + uint8_t vram_vendor; uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; @@ -177,14 +216,17 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; - struct ras_common_if *ras_if; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) +#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) +#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR @@ -229,5 +271,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); +int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7850084a05e3..60655834d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->base.s_fence->scheduled.context; + fence_ctx = job->base.s_fence ? + job->base.s_fence->scheduled.context : 0; } else { vm = NULL; fence_ctx = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 57b3d8a9bef3..3a67f6c046d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -104,7 +104,7 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence, * * Free the pasid only after all the fences in resv are signaled. */ -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned int pasid) { struct dma_fence *fence, **fences; @@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv, unsigned count; int r; - r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences); + r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences); if (r) goto fallback; @@ -156,7 +156,7 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - reservation_object_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout_rcu(resv, true, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } @@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, int r; if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); + return amdgpu_sync_fence(sync, ring->vmid_wait, false); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) @@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(adev, sync, &array->base, false); + r = amdgpu_sync_fence(sync, &array->base, false); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; return r; @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->entity.fence_context || + if ((*id)->owner != vm->direct.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(adev, sync, tmp, false); + r = amdgpu_sync_fence(sync, tmp, false); return r; } needs_flush = true; @@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence, false); if (r) return r; @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->entity.fence_context) + if ((*id)->owner != vm->direct.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -368,13 +368,14 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * are broken on Navi10 and Navi14. */ if (needs_flush && (adev->asic_type < CHIP_VEGA10 || - adev->asic_type == CHIP_NAVI10)) + adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14)) continue; /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence, false); if (r) return r; @@ -434,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, - fence, false); + r = amdgpu_sync_fence(&id->active, fence, false); if (r) goto error; @@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->entity.fence_context; + id->owner = vm->direct.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 7625419f0fc2..8e58325bbca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -72,7 +72,7 @@ struct amdgpu_vmid_mgr { int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(unsigned int pasid); -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned int pasid); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 6d8f05511aba..111a301ce878 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -66,7 +66,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, if (ih->ring == NULL) return -ENOMEM; - memset((void *)ih->ring, 0, ih->ring_size + 8); ih->gpu_addr = dma_addr; ih->wptr_addr = dma_addr + ih->ring_size; ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 2a3f5ec298db..5ed4227f304b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -55,6 +55,7 @@ #include "amdgpu_connectors.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" #include <linux/pm_runtime.h> @@ -87,10 +88,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) struct drm_device *dev = adev->ddev; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + struct drm_connector_list_iter iter; mutex_lock(&mode_config->mutex); - list_for_each_entry(connector, &mode_config->connector_list, head) + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) amdgpu_connector_hotplug(connector); + drm_connector_list_iter_end(&iter); mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); @@ -153,6 +157,22 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ret = amdgpu_ih_process(adev, &adev->irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); + + /* For the hardware that cannot enable bif ring for both ras_controller_irq + * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status + * register to check whether the interrupt is triggered or not, and properly + * ack the interrupt if it is there + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) { + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_controller_intr_no_bifring) + adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + } + return ret; } @@ -228,10 +248,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { - int ret = pci_enable_msi(adev->pdev); - if (!ret) { + int nvec = pci_msix_vec_count(adev->pdev); + unsigned int flags; + + if (nvec <= 0) { + flags = PCI_IRQ_MSI; + } else { + flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; + } + /* we only need one vector */ + nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); + if (nvec > 0) { adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "amdgpu: using MSI.\n"); + dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n"); } } @@ -254,7 +283,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); adev->irq.installed = true; - r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); + /* Use vector 0 for MSI-X */ + r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0)); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) @@ -284,7 +314,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) drm_irq_uninstall(adev->ddev); adev->irq.installed = false; if (adev->irq.msi_enabled) - pci_disable_msi(adev->pdev); + pci_free_irq_vectors(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); } @@ -369,7 +399,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @entry: interrupt vector pointer + * @ih: interrupt ring instance * * Dispatches IRQ to IP blocks. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9d76e0923a5a..d42be880a236 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; - job->owner = owner; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); priority = job->base.s_priority; @@ -193,8 +192,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync, &explicit); if (fence && explicit) { if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(ring->adev, &job->sched_sync, - fence, false); + r = amdgpu_sync_fence(&job->sched_sync, fence, false); if (r) DRM_ERROR("Error adding fence (%d)\n", r); } @@ -218,7 +216,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; - int r; + int r = 0; job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; @@ -243,9 +241,49 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); + + fence = r ? ERR_PTR(r) : fence; return fence; } +#define to_drm_sched_job(sched_job) \ + container_of((sched_job), struct drm_sched_job, queue_node) + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + struct drm_sched_entity *s_entity = NULL; + int i; + + /* Signal all jobs not yet scheduled */ + for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + if (!rq) + continue; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_signal(&s_fence->scheduled); + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } + } + spin_unlock(&rq->lock); + } + + /* Signal all jobs already scheduled to HW */ + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } +} + const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 51e62504c279..3f7b8433d179 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -49,7 +49,6 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; - void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; @@ -76,4 +75,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c new file mode 100644 index 000000000000..5727f00afc8e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -0,0 +1,211 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15d.h" +#include "soc15_common.h" + +#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000) + +static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); + +int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) +{ + INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); + + return 0; +} + +int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) +{ + int i; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); + } + + return 0; +} + +int amdgpu_jpeg_suspend(struct amdgpu_device *adev) +{ + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + return 0; +} + +int amdgpu_jpeg_resume(struct amdgpu_device *adev) +{ + return 0; +} + +static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, jpeg.idle_work.work); + unsigned int fences = 0; + unsigned int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); + } + + if (fences == 0) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + else + schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); +} + +void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (set_clocks) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_UNGATE); +} + +void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); +} + +int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + +static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + const unsigned ib_size_dw = 16; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); + ib->ptr[1] = 0xDEADBEEF; + for (i = 2; i < 16; i += 2) { + ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err; + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + struct dma_fence *fence = NULL; + long r = 0; + + r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence); + if (r) + goto error; + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto error; + } else if (r < 0) { + goto error; + } else { + r = 0; + } + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + dma_fence_put(fence); +error: + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h new file mode 100644 index 000000000000..bd9ef9cc86de --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -0,0 +1,64 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_JPEG_H__ +#define __AMDGPU_JPEG_H__ + +#define AMDGPU_MAX_JPEG_INSTANCES 2 + +#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) +#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) + +struct amdgpu_jpeg_reg{ + unsigned jpeg_pitch; +}; + +struct amdgpu_jpeg_inst { + struct amdgpu_ring ring_dec; + struct amdgpu_irq_src irq; + struct amdgpu_jpeg_reg external; +}; + +struct amdgpu_jpeg { + uint8_t num_jpeg_inst; + struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; + struct amdgpu_jpeg_reg internal; + struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES]; + uint32_t num_jpeg_sched; + unsigned harvest_config; + struct delayed_work idle_work; + enum amd_powergating_state cur_state; +}; + +int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); +int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev); +int amdgpu_jpeg_suspend(struct amdgpu_device *adev); +int amdgpu_jpeg_resume(struct amdgpu_device *adev); + +void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring); + +int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0cf7e8606fd3..60591dbc2097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (amdgpu_sriov_vf(adev)) amdgpu_virt_request_full_gpu(adev, false); - if (amdgpu_device_is_px(dev)) { + if (adev->runpm) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -144,49 +144,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; } dev->dev_private = (void *)adev; - if ((amdgpu_runtime_pm != 0) && - amdgpu_has_atpx() && + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((flags & AMD_IS_APU) == 0) && @@ -205,6 +169,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) goto out; } + if (amdgpu_device_supports_boco(dev) && + (amdgpu_runtime_pm != 0)) /* enable runpm by default */ + adev->runpm = true; + else if (amdgpu_device_supports_baco(dev) && + (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */ + adev->runpm = true; + /* Call ACPI methods: require modeset init * but failure is not fatal */ @@ -215,7 +186,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - if (amdgpu_device_is_px(dev)) { + if (adev->runpm) { dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); @@ -225,11 +196,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ - if (adev->rmmio && amdgpu_device_is_px(dev)) + if (adev->rmmio && adev->runpm) pm_runtime_put_noidle(dev->dev); amdgpu_driver_unload_kms(dev); } @@ -329,6 +299,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->dm.dmcu_fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_DMCUB: + fw_info->ver = adev->dm.dmcub_fw_version; + fw_info->feature = 0; + break; default: return -EINVAL; } @@ -408,23 +382,40 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_dec.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.ring_enc[i].sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->vcn.num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: - type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_jpeg.sched.ready) - ++num_rings; + type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? + AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (adev->jpeg.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; @@ -538,9 +529,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: - case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; break; + case AMDGPU_HW_IP_VCN_JPEG: + type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? + AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + break; default: return -EINVAL; } @@ -604,9 +598,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_vram_gtt vram_gtt; vram_gtt.vram_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); - vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; + vram_gtt.vram_cpu_accessible_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + vram_gtt.vram_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); @@ -619,15 +616,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = adev->gmc.visible_vram_size; - mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + mem.cpu_accessible_vram.usable_heap_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + mem.vram.usable_heap_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = @@ -662,20 +662,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; + if (info->read_mmr_reg.count > 128) + return -EINVAL; + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); if (!regs) return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; @@ -696,10 +703,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (adev->pm.dpm_enabled) { dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; - } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) { - dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10; - dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10; } else { dev_info.max_engine_clock = adev->clock.default_sclk * 10; dev_info.max_memory_clock = adev->clock.default_mclk * 10; @@ -746,17 +749,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; - - if (amdgpu_ngg) { - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; - dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; - dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; - dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; - dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; - } dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; @@ -769,6 +761,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } @@ -983,6 +977,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* Ensure IB tests are run on ring */ flush_delayed_work(&adev->delayed_init_work); + + if (amdgpu_ras_intr_triggered()) { + DRM_ERROR("RAS Intr triggered, device disabled!!"); + return -EHWPOISON; + } + file_priv->driver_priv = NULL; r = pm_runtime_get_sync(dev->dev); @@ -1088,7 +1088,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) - amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); + amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) @@ -1405,6 +1405,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* DMCUB */ + query_fw.fw_type = AMDGPU_INFO_FW_DMCUB; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c new file mode 100644 index 000000000000..676c48c02d77 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -0,0 +1,70 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "mmhub_err_count", + .debugfs_name = "mmhub_err_inject", + }; + + if (!adev->mmhub.ras_if) { + adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->mmhub.ras_if) + return -ENOMEM; + adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if->sub_block_index = 0; + strcpy(adev->mmhub.ras_if->name, "mmhub"); + } + ih_info.head = fs_info.head = *adev->mmhub.ras_if; + r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) { + kfree(adev->mmhub.ras_if); + adev->mmhub.ras_if = NULL; + } + + return r; +} + +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && + adev->mmhub.ras_if) { + struct ras_common_if *ras_if = adev->mmhub.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h new file mode 100644 index 000000000000..1cd78940cf82 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MMHUB_H__ +#define __AMDGPU_MMHUB_H__ + +struct amdgpu_mmhub_funcs { + void (*ras_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); +}; + +struct amdgpu_mmhub { + struct ras_common_if *ras_if; + const struct amdgpu_mmhub_funcs *funcs; +}; + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 3971c201f320..828b5167ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,437 +51,107 @@ #include "amdgpu_amdkfd.h" /** - * struct amdgpu_mn_node + * amdgpu_mn_invalidate_gfx - callback to notify about mm change * - * @it: interval node defining start-last of the affected address range - * @bos: list of all BOs in the affected address range - * - * Manages all BOs which are affected of a certain range of address space. - */ -struct amdgpu_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - -/** - * amdgpu_mn_destroy - destroy the HMM mirror - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void amdgpu_mn_destroy(struct work_struct *work) -{ - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = amn->adev; - struct amdgpu_mn_node *node, *next_node; - struct amdgpu_bo *bo, *next_bo; - - mutex_lock(&adev->mn_lock); - down_write(&amn->lock); - hash_del(&amn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &amn->objects.rb_root, it.rb) { - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); - - hmm_mirror_unregister(&amn->mirror); - kfree(amn); -} - -/** - * amdgpu_hmm_mirror_release - callback to notify about mm destruction - * - * @mirror: the HMM mirror (mm) this callback is about - * - * Shedule a work item to lazy destroy HMM mirror. - */ -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - - INIT_WORK(&amn->work, amdgpu_mn_destroy); - schedule_work(&amn->work); -} - -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - -/** - * amdgpu_mn_read_lock - take the read side lock for this notifier - * - * @amn: our notifier - */ -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) -{ - if (blockable) - down_read(&amn->lock); - else if (!down_read_trylock(&amn->lock)) - return -EAGAIN; - - return 0; -} - -/** - * amdgpu_mn_read_unlock - drop the read side lock for this notifier - * - * @amn: our notifier - */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) -{ - up_read(&amn->lock); -} - -/** - * amdgpu_mn_invalidate_node - unmap all BOs of a node - * - * @node: the node with the BOs to unmap - * @start: start of address range affected - * @end: end of address range affected + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, - unsigned long start, - unsigned long end) +static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - list_for_each_entry(bo, &node->bos, mn_list) { - - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) - continue; - - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); - } -} - -/** - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * Block for operations on BOs to finish and mark pages as accessed and - * potentially dirty. - */ -static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct hmm_update *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = update->blockable; - struct interval_tree_node *it; + if (!mmu_notifier_range_blockable(range)) + return false; - /* notification is exclusive, but interval is inclusive */ - end -= 1; + mutex_lock(&adev->notifier_lock); - /* TODO we should be able to split locking for interval tree and - * amdgpu_mn_invalidate_node - */ - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; + mmu_interval_set_seq(mni, cur_seq); - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - amdgpu_mn_invalidate_node(node, start, end); - } - - amdgpu_mn_read_unlock(amn); - - return 0; -} - -/** - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * We temporarily evict all BOs between start and end. This - * necessitates evicting all user-mode queues of the process. The BOs - * are restorted in amdgpu_mn_invalidate_range_end_hsa. - */ -static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct hmm_update *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = update->blockable; - struct interval_tree_node *it; - - /* notification is exclusive, but interval is inclusive */ - end -= 1; - - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - struct amdgpu_bo *bo; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - list_for_each_entry(bo, &node->bos, mn_list) { - struct kgd_mem *mem = bo->kfd_bo; - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) - amdgpu_amdkfd_evict_userptr(mem, amn->mm); - } - } - - amdgpu_mn_read_unlock(amn); - - return 0; + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&adev->notifier_lock); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + return true; } -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - -static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, - .release = amdgpu_hmm_mirror_release - }, - [AMDGPU_MN_TYPE_HSA] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, - .release = amdgpu_hmm_mirror_release - }, +static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { + .invalidate = amdgpu_mn_invalidate_gfx, }; /** - * amdgpu_mn_get - create HMM mirror context + * amdgpu_mn_invalidate_hsa - callback to notify about mm change * - * @adev: amdgpu device pointer - * @type: type of MMU notifier context + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * - * Creates a HMM mirror context for current->mm. + * We temporarily evict the BO attached to this range. This necessitates + * evicting all user-mode queues of the process. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) +static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct mm_struct *mm = current->mm; - struct amdgpu_mn *amn; - unsigned long key = AMDGPU_MN_KEY(mm, type); - int r; - - mutex_lock(&adev->mn_lock); - if (down_write_killable(&mm->mmap_sem)) { - mutex_unlock(&adev->mn_lock); - return ERR_PTR(-EINTR); - } - - hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) - goto release_locks; - - amn = kzalloc(sizeof(*amn), GFP_KERNEL); - if (!amn) { - amn = ERR_PTR(-ENOMEM); - goto release_locks; - } - - amn->adev = adev; - amn->mm = mm; - init_rwsem(&amn->lock); - amn->type = type; - amn->objects = RB_ROOT_CACHED; - - amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; - r = hmm_mirror_register(&amn->mirror, mm); - if (r) - goto free_amn; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); + if (!mmu_notifier_range_blockable(range)) + return false; -release_locks: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); + mutex_lock(&adev->notifier_lock); - return amn; + mmu_interval_set_seq(mni, cur_seq); -free_amn: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - kfree(amn); + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); + mutex_unlock(&adev->notifier_lock); - return ERR_PTR(r); + return true; } +static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { + .invalidate = amdgpu_mn_invalidate_hsa, +}; + /** * amdgpu_mn_register - register a BO for notifier updates * * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an HMM mirror for the given BO at the specified address. + * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { - unsigned long end = addr + amdgpu_bo_size(bo) - 1; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - enum amdgpu_mn_type type = - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *amn; - struct amdgpu_mn_node *node = NULL, *new_node; - struct list_head bos; - struct interval_tree_node *it; - - amn = amdgpu_mn_get(adev, type); - if (IS_ERR(amn)) - return PTR_ERR(amn); - - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); - if (!new_node) - return -ENOMEM; - - INIT_LIST_HEAD(&bos); - - down_write(&amn->lock); - - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &amn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) - node = new_node; - else - kfree(new_node); - - bo->mn = amn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); - - interval_tree_insert(&node->it, &amn->objects); - - up_write(&amn->lock); - - return 0; + if (bo->kfd_bo) + return mmu_interval_notifier_insert(&bo->notifier, current->mm, + addr, amdgpu_bo_size(bo), + &amdgpu_mn_hsa_ops); + return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_mn_gfx_ops); } /** - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates + * amdgpu_mn_unregister - unregister a BO for notifier updates * * @bo: amdgpu buffer object * - * Remove any registration of HMM mirror updates from the buffer object. + * Remove any registration of mmu notifier updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *amn; - struct list_head *head; - - mutex_lock(&adev->mn_lock); - - amn = bo->mn; - if (amn == NULL) { - mutex_unlock(&adev->mn_lock); + if (!bo->notifier.mm) return; - } - - down_write(&amn->lock); - - /* save the next list entry for later */ - head = bo->mn_list.next; - - bo->mn = NULL; - list_del_init(&bo->mn_list); - - if (list_empty(head)) { - struct amdgpu_mn_node *node; - - node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &amn->objects); - kfree(node); - } - - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); -} - -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { - (1 << 0), /* HMM_PFN_VALID */ - (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ -}; - -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ - 0, /* HMM_PFN_NONE */ - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ -}; - -void amdgpu_hmm_init_range(struct hmm_range *range) -{ - if (range) { - range->flags = hmm_range_flags; - range->values = hmm_range_values; - range->pfn_shift = PAGE_SHIFT; - INIT_LIST_HEAD(&range->list); - } + mmu_interval_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index b8ed68943625..a292238f75eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,63 +30,10 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> -enum amdgpu_mn_type { - AMDGPU_MN_TYPE_GFX, - AMDGPU_MN_TYPE_HSA, -}; - -/** - * struct amdgpu_mn - * - * @adev: amdgpu device pointer - * @mm: process address space - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes - * @mirror: HMM mirror function support - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - struct mm_struct *mm; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - struct rb_root_cached objects; - -#ifdef CONFIG_HMM_MIRROR - /* HMM mirror */ - struct hmm_mirror mirror; -#endif -}; - #if defined(CONFIG_HMM_MIRROR) -void amdgpu_mn_lock(struct amdgpu_mn *mn); -void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); -void amdgpu_hmm_init_range(struct hmm_range *range); #else -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - return NULL; -} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c new file mode 100644 index 000000000000..7d5c3a9de9ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "pcie_bif_err_count", + .debugfs_name = "pcie_bif_err_inject", + }; + + if (!adev->nbio.ras_if) { + adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->nbio.ras_if) + return -ENOMEM; + adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; + adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->nbio.ras_if->sub_block_index = 0; + strcpy(adev->nbio.ras_if->name, "pcie_bif"); + } + ih_info.head = fs_info.head = *adev->nbio.ras_if; + r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); + if (r) + goto late_fini; + r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info); +free: + kfree(adev->nbio.ras_if); + adev->nbio.ras_if = NULL; + return r; +} + +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && + adev->nbio.ras_if) { + struct ras_common_if *ras_if = adev->nbio.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h new file mode 100644 index 000000000000..919bd566ba3c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_NBIO_H__ +#define __AMDGPU_NBIO_H__ + +/* + * amdgpu nbio functions + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*enable_doorbell_interrupt)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); + void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); + void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); + int (*init_ras_controller_interrupt)(struct amdgpu_device *adev); + int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + int (*ras_late_init)(struct amdgpu_device *adev); +}; + +struct amdgpu_nbio { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + struct amdgpu_irq_src ras_controller_irq; + struct amdgpu_irq_src ras_err_event_athub_irq; + struct ras_common_if *ras_if; + const struct amdgpu_nbio_funcs *funcs; +}; + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev); +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index bea6f298dfdc..e3f16b49e970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -80,14 +80,11 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) if (bo->pin_count > 0) amdgpu_bo_subtract_pin_size(bo); - if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_memory_limit(bo); - amdgpu_bo_kunmap(bo); - if (bo->gem_base.import_attach) - drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); - drm_gem_object_release(&bo->gem_base); + if (bo->tbo.base.import_attach) + drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); + drm_gem_object_release(&bo->tbo.base); /* in case amdgpu_device_recover_vram got NULL of bo->parent */ if (!list_empty(&bo->shadow_list)) { mutex_lock(&adev->shadow_list_lock); @@ -249,8 +246,9 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, bp.size = size; bp.byte_align = align; bp.domain = domain; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED + : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; bp.type = ttm_bo_type_kernel; bp.resv = NULL; @@ -345,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } /** + * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location + * + * @adev: amdgpu device object + * @offset: offset of the BO + * @size: size of the BO + * @domain: where to place it + * @bo_ptr: used to initialize BOs in structures + * @cpu_addr: optional CPU address mapping + * + * Creates a kernel BO at a specific offset in the address space of the domain. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + unsigned int i; + int r; + + offset &= PAGE_MASK; + size = ALIGN(size, PAGE_SIZE); + + r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, + NULL, cpu_addr); + if (r) + return r; + + /* + * Remove the original mem node and create a new one at the request + * position. + */ + if (cpu_addr) + amdgpu_bo_kunmap(*bo_ptr); + + ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem); + + for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { + (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; + (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, + &(*bo_ptr)->tbo.mem, &ctx); + if (r) + goto error; + + if (cpu_addr) { + r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); + if (r) + goto error; + } + + amdgpu_bo_unreserve(*bo_ptr); + return 0; + +error: + amdgpu_bo_unreserve(*bo_ptr); + amdgpu_bo_unref(bo_ptr); + return r; +} + +/** * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free @@ -413,15 +475,50 @@ fail: return false; } +bool amdgpu_bo_support_uswc(u64 bo_flags) +{ + +#ifdef CONFIG_X86_32 + /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit + * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 + */ + return false; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) + /* Don't try to enable write-combining when it can't work, or things + * may be slow + * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 + */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ + thanks to write-combining +#endif + + if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); + return false; +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + return false; + + return true; +#endif +} + static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), - .no_wait_gpu = false, + .no_wait_gpu = bp->no_wait_gpu, .resv = bp->resv, - .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT + .flags = bp->type != ttm_bo_type_kernel ? + TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; @@ -454,7 +551,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); + drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size); INIT_LIST_HEAD(&bo->shadow_list); bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : @@ -466,33 +563,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->flags = bp->flags; -#ifdef CONFIG_X86_32 - /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit - * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 - */ - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) - /* Don't try to enable write-combining when it can't work, or things - * may be slow - * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 - */ - -#ifndef CONFIG_COMPILE_TEST -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ - thanks to write-combining -#endif - - if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#else - /* For architectures that don't support WC memory, - * mask out the WC flag from the BO - */ - if (!drm_arch_can_wc_memory()) + if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#endif bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -521,7 +593,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -544,7 +616,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, fail_unreserve: if (!bp->resv) - ww_mutex_unlock(&bo->tbo.resv->lock); + dma_resv_unlock(bo->tbo.base.resv); amdgpu_bo_unref(&bo); return r; } @@ -565,7 +637,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW; bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.resv; + bp.resv = bo->tbo.base.resv; r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { @@ -606,13 +678,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { if (!bp->resv) - WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv, NULL)); r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); if (!bp->resv) - reservation_object_unlock((*bo_ptr)->tbo.resv); + dma_resv_unlock((*bo_ptr)->tbo.base.resv); if (r) amdgpu_bo_unref(bo_ptr); @@ -709,7 +781,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; @@ -1051,7 +1123,10 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma) { - return ttm_fbdev_mmap(vma, &bo->tbo); + if (vma->vm_pgoff != 0) + return -EACCES; + + return ttm_bo_mmap_obj(vma, &bo->tbo); } /** @@ -1087,7 +1162,7 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) */ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { - lockdep_assert_held(&bo->tbo.resv->lock.base); + dma_resv_assert_held(bo->tbo.base.resv); if (tiling_flags) *tiling_flags = bo->tiling_flags; @@ -1212,6 +1287,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, } /** + * amdgpu_bo_move_notify - notification about a BO being released + * @bo: pointer to a buffer object + * + * Wipes VRAM buffers whose contents should not be leaked before the + * memory is released. + */ +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *abo; + int r; + + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return; + + abo = ttm_to_amdgpu_bo(bo); + + if (abo->kfd_bo) + amdgpu_amdkfd_unreserve_memory_limit(abo); + + if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + return; + + dma_resv_lock(bo->base.resv, NULL); + + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); + if (!WARN_ON(r)) { + amdgpu_bo_fence(abo, fence, false); + dma_fence_put(fence); + } + + dma_resv_unlock(bo->base.resv); +} + +/** * amdgpu_bo_fault_reserve_notify - notification about a memory fault * @bo: pointer to a buffer object * @@ -1283,12 +1394,12 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { - struct reservation_object *resv = bo->tbo.resv; + struct dma_resv *resv = bo->tbo.base.resv; if (shared) - reservation_object_add_shared_fence(resv, fence); + dma_resv_add_shared_fence(resv, fence); else - reservation_object_add_excl_fence(resv, fence); + dma_resv_add_excl_fence(resv, fence); } /** @@ -1308,7 +1419,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); + amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false); r = amdgpu_sync_wait(&sync, intr); amdgpu_sync_free(&sync); @@ -1328,7 +1439,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); - WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && + WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d60593cc436e..36dec51d1ef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,9 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 @@ -41,7 +44,8 @@ struct amdgpu_bo_param { u32 preferred_domain; u64 flags; enum ttm_bo_type type; - struct reservation_object *resv; + bool no_wait_gpu; + struct dma_resv *resv; }; /* bo virtual addresses in a vm */ @@ -94,17 +98,18 @@ struct amdgpu_bo { /* per VM structure for page tables and with virtual addresses */ struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ - struct drm_gem_object gem_base; struct amdgpu_bo *parent; struct amdgpu_bo *shadow; struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; - union { - struct list_head mn_list; - struct list_head shadow_list; - }; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_interval_notifier notifier; +#endif + + struct list_head shadow_list; struct kgd_mem *kfd_bo; }; @@ -192,7 +197,7 @@ static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo) */ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) { - return drm_vma_node_offset_addr(&bo->tbo.vma_node); + return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } /** @@ -238,6 +243,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); @@ -265,6 +273,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem); +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); @@ -308,5 +317,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); #endif +bool amdgpu_bo_support_uswc(u64 bo_flags); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 2b546567853b..b03b1eb7ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -37,6 +37,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> +#include <linux/pm_runtime.h> #include "hwmgr.h" #define WIDTH_4K 3840 @@ -158,10 +159,18 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) - pm = amdgpu_smu_get_current_power_state(adev); + pm = smu_get_current_power_state(&adev->smu); else pm = adev->pm.dpm.user_state; } else if (adev->powerplay.pp_funcs->get_current_power_state) { @@ -170,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -183,6 +195,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; if (strncmp("battery", buf, strlen("battery")) == 0) state = POWER_STATE_TYPE_BATTERY; @@ -190,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, state = POWER_STATE_TYPE_BALANCED; else if (strncmp("performance", buf, strlen("performance")) == 0) state = POWER_STATE_TYPE_PERFORMANCE; - else { - count = -EINVAL; - goto fail; - } + else + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); @@ -206,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, adev->pm.dpm.user_state = state; mutex_unlock(&adev->pm.mutex); - /* Can't set dpm state when the card is off */ - if (!(adev->flags & AMD_IS_PX) || - (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) - amdgpu_pm_compute_clocks(adev); + amdgpu_pm_compute_clocks(adev); } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -282,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; + int ret; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -297,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : @@ -320,18 +341,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, enum amd_dpm_forced_level current_level = 0xff; int ret = 0; - /* Can't force performance level when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; - if (!amdgpu_sriov_vf(adev)) { - if (is_support_sw_smu(adev)) - current_level = smu_get_performance_level(&adev->smu); - else if (adev->powerplay.pp_funcs->get_performance_level) - current_level = amdgpu_dpm_get_performance_level(adev); - } - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -351,24 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) { level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; } else { - count = -EINVAL; - goto fail; - } - - if (amdgpu_sriov_vf(adev)) { - if (amdgim_is_hwperf(adev) && - adev->virt.ops->force_dpm_level) { - mutex_lock(&adev->pm.mutex); - adev->virt.ops->force_dpm_level(adev, level); - mutex_unlock(&adev->pm.mutex); - return count; - } else { - return -EINVAL; - } - } + return -EINVAL; + } - if (current_level == level) + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) + current_level = amdgpu_dpm_get_performance_level(adev); + + if (current_level == level) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; + } /* profile_exit setting is valid only when current mode is in profile mode */ if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | @@ -377,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { pr_err("Currently not in any profile mode!\n"); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } if (is_support_sw_smu(adev)) { ret = smu_force_performance_level(&adev->smu, level); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { - count = -EINVAL; mutex_unlock(&adev->pm.mutex); - goto fail; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); - if (ret) - count = -EINVAL; - else + if (ret) { + mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } else { adev->pm.dpm.forced_level = level; + } mutex_unlock(&adev->pm.mutex); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); -fail: return count; } @@ -412,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); if (ret) @@ -419,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, @@ -441,6 +470,13 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, enum amd_pm_state_type pm = 0; int i = 0, ret = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); ret = smu_get_power_num_states(smu, &data); @@ -452,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, amdgpu_dpm_get_pp_num_states(adev, &data); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + for (i = 0; i < data.nums; i++) { if (pm == data.states[i]) break; @@ -470,6 +509,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + if (adev->pp_force_state_enabled) return amdgpu_get_pp_cur_state(dev, attr, buf); else @@ -487,6 +529,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, unsigned long idx; int ret; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + if (strlen(buf) == 1) adev->pp_force_state_enabled = false; else if (is_support_sw_smu(adev)) @@ -496,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct pp_states_info data; ret = kstrtoul(buf, 0, &idx); - if (ret || idx >= ARRAY_SIZE(data.states)) { - count = -EINVAL; - goto fail; - } + if (ret || idx >= ARRAY_SIZE(data.states)) + return -EINVAL; + idx = array_index_nospec(idx, ARRAY_SIZE(data.states)); amdgpu_dpm_get_pp_num_states(adev, &data); state = data.states[idx]; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -511,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); } -fail: + return count; } @@ -534,17 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size; + int size, ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (size < 0) return size; - } - else if (adev->powerplay.pp_funcs->get_pp_table) + } else if (adev->powerplay.pp_funcs->get_pp_table) { size = amdgpu_dpm_get_pp_table(adev, &table); - else + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (size < 0) + return size; + } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return 0; + } if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; @@ -563,13 +629,26 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return ret; + } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -655,6 +734,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, const char delimiter[3] = {' ', '\n', '\0'}; uint32_t type; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (count > 127) return -EINVAL; @@ -690,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } if (type == PP_OD_COMMIT_DPM_TABLE) { @@ -709,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -725,31 +823,40 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t size = 0; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); - return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); - return size; } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return size; } /** - * DOC: ppfeatures + * DOC: pp_features * * The amdgpu driver provides a sysfs API for adjusting what powerplay - * features to be enabled. The file ppfeatures is used for this. And + * features to be enabled. The file pp_features is used for this. And * this is only available for Vega10 and later dGPUs. * * Reading back the file will show you the followings: @@ -761,7 +868,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, * the corresponding bit from original ppfeature masks and input the * new ppfeature masks. */ -static ssize_t amdgpu_set_ppfeature_status(struct device *dev, +static ssize_t amdgpu_set_pp_feature_status(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -771,43 +878,71 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, uint64_t featuremask; int ret; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = kstrtou64(buf, 0, &featuremask); if (ret) return -EINVAL; pr_debug("featuremask = 0x%llx\n", featuremask); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { - ret = smu_set_ppfeature_status(&adev->smu, featuremask); - if (ret) + ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } -static ssize_t amdgpu_get_ppfeature_status(struct device *dev, +static ssize_t amdgpu_get_pp_feature_status(struct device *dev, struct device_attribute *attr, char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; - if (is_support_sw_smu(adev)) { - return smu_get_ppfeature_status(&adev->smu, buf); - } else if (adev->powerplay.pp_funcs->get_ppfeature_status) - return amdgpu_dpm_get_ppfeature_status(adev, buf); + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_sys_get_pp_feature_mask(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_ppfeature_status) + size = amdgpu_dpm_get_ppfeature_status(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk - * pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, @@ -823,9 +958,15 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, * * To manually adjust these states, first select manual using * power_dpm_force_performance_level. - * Secondly,Enter a new value for each level by inputing a string that + * Secondly, enter a new value for each level by inputing a string that * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" - * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + * E.g., + * + * .. code-block:: bash + * + * echo "4 5 6" > pp_dpm_sclk + * + * will enable sclk levels 4, 5, and 6. * * NOTE: change to the dcefclk max dpm level is not supported now */ @@ -836,17 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) - return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /* @@ -895,18 +1046,25 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_sriov_vf(adev)) - return 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -919,17 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) - return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, @@ -939,21 +1107,28 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret; uint32_t mask = 0; + int ret; - if (amdgpu_sriov_vf(adev)) - return 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -966,13 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, @@ -985,14 +1174,26 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1006,13 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, @@ -1025,14 +1240,26 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1046,13 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, @@ -1065,14 +1306,26 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1086,13 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, @@ -1105,14 +1372,26 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1127,12 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1146,12 +1436,17 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, int ret; long int value; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1167,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1178,12 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1197,12 +1505,17 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, int ret; long int value; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1218,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1248,13 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_get_power_profile_mode(&adev->smu, buf); + size = smu_get_power_profile_mode(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_power_profile_mode) - return amdgpu_dpm_get_power_profile_mode(adev, buf); + size = amdgpu_dpm_get_power_profile_mode(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); - return snprintf(buf, PAGE_SIZE, "\n"); + return size; } @@ -1279,7 +1608,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); if (ret) - goto fail; + return -EINVAL; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { if (count < 2 || count > 127) @@ -1291,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, while (tmp_str[0]) { sub_str = strsep(&tmp_str, delimiter); ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; parameter_size++; while (isspace(*tmp_str)) tmp_str++; } } parameter[parameter_size] = profile_mode; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size); + ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (!ret) return count; -fail: + return -EINVAL; } @@ -1327,10 +1666,20 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1353,10 +1702,20 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1382,8 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint64_t count0, count1; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", count0, count1, pcie_get_mps(adev->pdev)); } @@ -1405,6 +1776,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + if (adev->unique_id) return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); @@ -1458,9 +1832,9 @@ static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, static DEVICE_ATTR(mem_busy_percent, S_IRUGO, amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, - amdgpu_get_ppfeature_status, - amdgpu_set_ppfeature_status); +static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, + amdgpu_get_pp_feature_status, + amdgpu_set_pp_feature_status); static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, @@ -1468,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - /* Can't get temperature when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (channel >= PP_TEMP_MAX) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_EDGE: /* get current edge temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_MEM: /* get current memory temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, (void *)&temp, &size); - if (r) - return r; + break; + default: + r = -EINVAL; break; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (r) + return r; + return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1599,15 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode); } @@ -1617,31 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, size_t count) { struct amdgpu_device *adev = dev_get_drvdata(dev); - int err; + int err, ret; int value; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = kstrtoint(buf, 10, &value); + if (err) + return err; - if (is_support_sw_smu(adev)) { - err = kstrtoint(buf, 10, &value); - if (err) - return err; + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; - - err = kstrtoint(buf, 10, &value); - if (err) - return err; + } amdgpu_dpm_set_fan_control_mode(adev, value); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1668,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } value = (value * 100) / 255; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_percent(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->set_fan_speed_percent) err = amdgpu_dpm_set_fan_speed_percent(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1708,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->get_fan_speed_percent) err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; speed = (speed * 255) / 100; @@ -1736,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", speed); } @@ -1763,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1780,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1796,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", rpm); } @@ -1823,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -ENODATA; - - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_rpm(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) err = amdgpu_dpm_set_fan_speed_rpm(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1859,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1881,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - - err = kstrtoint(buf, 10, &value); if (err) return err; @@ -1898,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1914,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddgfx; int r, size = sizeof(vddgfx); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1944,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddnb; int r, size = sizeof(vddnb); @@ -1952,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, if (!(adev->flags & AMD_IS_APU)) return -EINVAL; - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1978,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 query = 0; int r, size = sizeof(u32); unsigned uw; - /* Can't get power when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2013,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + smu_get_power_limit(&adev->smu, &limit, true, true); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, @@ -2031,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + smu_get_power_limit(&adev->smu, &limit, false, true); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } @@ -2053,20 +2524,32 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + err = kstrtou32(buf, 10, &value); if (err) return err; value = value / 1000000; /* convert to Watt */ - if (is_support_sw_smu(adev)) { - adev->smu.funcs->set_power_limit(&adev->smu, value); - } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + + + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + + if (is_support_sw_smu(adev)) + err = smu_set_power_limit(&adev->smu, value); + else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - if (err) - return err; - } else { - return -EINVAL; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -2076,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t sclk; int r, size = sizeof(sclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2106,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t mclk; int r, size = sizeof(mclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2199,9 +2686,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - fan1_input: fan speed in RPM * - * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM) + * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM) * - * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable + * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable * * hwmon interfaces for GPU clocks: * @@ -2297,6 +2784,23 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; + /* under multi-vf mode, the hwmon attributes are all not supported */ + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + /* there is no fan under pp one vf mode */ + if (amdgpu_sriov_is_pp_one_vf(adev) && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) + return 0; + /* Skip fan attributes if fan is not present */ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || @@ -2352,7 +2856,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; } - if ((adev->flags & AMD_IS_APU) && + if (((adev->flags & AMD_IS_APU) || + adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ (attr == &sensor_dev_attr_power1_average.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| @@ -2376,6 +2882,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; } + if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ + (attr == &sensor_dev_attr_in0_input.dev_attr.attr || + attr == &sensor_dev_attr_in0_label.dev_attr.attr)) + return 0; + /* only APUs have vddnb */ if (!(adev->flags & AMD_IS_APU) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || @@ -2656,17 +3168,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (is_support_sw_smu(adev)) { - ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable); - if (ret) - DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n", - enable ? "true" : "false", ret); - } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { - /* enable/disable UVD */ - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); - mutex_unlock(&adev->pm.mutex); - } + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + /* enable/disable Low Memory PState for UVD (4k videos) */ if (adev->asic_type == CHIP_STONEY && adev->uvd.decode_image_width >= WIDTH_4K) { @@ -2683,17 +3190,11 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (is_support_sw_smu(adev)) { - ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable); - if (ret) - DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n", - enable ? "true" : "false", ret); - } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { - /* enable/disable VCE */ - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); - mutex_unlock(&adev->pm.mutex); - } + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + if (ret) + DRM_ERROR("Dpm %s vce failed, ret = %d. \n", + enable ? "enable" : "disable", ret); } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) @@ -2708,42 +3209,14 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } -int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev) +void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) - return ret; - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_sclk\n"); - return ret; - } - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_mclk\n"); - return ret; - } - - ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) { - DRM_ERROR("failed to create device file for dpm state\n"); - return ret; - } - - return ret; -} - -void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev) -{ - if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) - return; - - device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable); + if (ret) + DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n", + enable ? "enable" : "disable", ret); } int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) @@ -2820,6 +3293,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_sclk\n"); return ret; } + + /* Arcturus does not support standalone mclk/socclk/fclk level setting */ + if (adev->asic_type == CHIP_ARCTURUS) { + dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_mclk.store = NULL; + + dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_socclk.store = NULL; + + dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_fclk.store = NULL; + } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); if (ret) { DRM_ERROR("failed to create device file pp_dpm_mclk\n"); @@ -2831,10 +3317,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_socclk\n"); return ret; } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); + return ret; + } } } if (adev->asic_type >= CHIP_VEGA20) { @@ -2844,10 +3332,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_pcie\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_pcie\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od); if (ret) { @@ -2917,10 +3407,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, - &dev_attr_ppfeatures); + &dev_attr_pp_features); if (ret) { DRM_ERROR("failed to create device file " - "ppfeatures\n"); + "pp_features\n"); return ret; } } @@ -2951,9 +3441,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); if (adev->asic_type >= CHIP_VEGA10) { device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); } - device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); if (adev->asic_type >= CHIP_VEGA20) device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); @@ -2974,7 +3466,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_ppfeatures); + device_remove_file(adev->dev, &dev_attr_pp_features); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -2997,7 +3489,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm; smu_handle_task(&adev->smu, smu_dpm->dpm_level, - AMD_PP_TASK_DISPLAY_CONFIG_CHANGE); + AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, + true); } else { if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { @@ -3133,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_device *ddev = adev->ddev; u32 flags = 0; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); @@ -3143,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if (!adev->pm.dpm_enabled) { seq_printf(m, "dpm not enabled\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return 0; } - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { - seq_printf(m, "PX asic powered off\n"); - } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + + if (!is_support_sw_smu(adev) && + adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); else seq_printf(m, "Debugfs support not implemented for this asic\n"); mutex_unlock(&adev->pm.mutex); + r = 0; } else { - return amdgpu_debugfs_pm_info_pp(m, adev); + r = amdgpu_debugfs_pm_info_pp(m, adev); } - return 0; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; } static const struct drm_info_list amdgpu_pm_info_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index ef31448ee8d8..3da1da277805 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -41,5 +41,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_thermal_work_handler(struct work_struct *work); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 0e6dba9f60f0..1311d6aec5d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event) return -ENOENT; /* update the hw_perf_event struct with config data */ - hwc->conf = event->attr.config; + hwc->config = event->attr.config; return 0; } @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1); - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0); break; default: break; @@ -101,13 +101,13 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config, &count); break; default: count = 0; break; - }; + } } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); local64_add(count - prev, &event->count); @@ -126,11 +126,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0); break; default: break; - }; + } WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; @@ -156,11 +156,12 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, + hwc->config, 1); break; default: return 0; - }; + } if (retval) return retval; @@ -184,11 +185,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1); break; default: break; - }; + } perf_event_update_userpage(event); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c027e5e7713e..3a1570dafe34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -32,6 +32,9 @@ #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v12_0.h" + +#include "amdgpu_ras.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -53,13 +56,19 @@ static int psp_early_init(void *handle) psp->autoload_supported = false; break; case CHIP_VEGA20: + case CHIP_ARCTURUS: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; + case CHIP_RENOIR: + psp_v12_0_set_psp_funcs(psp); + break; default: return -EINVAL; } @@ -81,6 +90,17 @@ static int psp_sw_init(void *handle) return ret; } + ret = psp_mem_training_init(psp); + if (ret) { + DRM_ERROR("Failed to initialize memory training!\n"); + return ret; + } + ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } + return 0; } @@ -88,6 +108,7 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + psp_mem_training_fini(&adev->psp); release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -137,18 +158,26 @@ psp_cmd_submit_buf(struct psp_context *psp, memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); index = atomic_inc_return(&psp->fence_value); - ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, - fence_mc_addr, index); + ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); mutex_unlock(&psp->mutex); return ret; } + amdgpu_asic_invalidate_hdp(psp->adev, NULL); while (*((unsigned int *)psp->fence_buf) != index) { if (--timeout == 0) break; + /* + * Shouldn't wait for timeout when err_event_athub occurs, + * because gpu reset thread triggered and lock resource should + * be released for psp resume sequence. + */ + if (amdgpu_ras_intr_triggered()) + break; msleep(1); + amdgpu_asic_invalidate_hdp(psp->adev, NULL); } /* In some cases, psp response status is not 0 even there is no @@ -162,8 +191,9 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (%d)\n", - psp->cmd_buf_mem->resp.status); + DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n", + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -233,6 +263,8 @@ static int psp_tmr_init(struct psp_context *psp) { int ret; int tmr_size; + void *tmr_buf; + void **pptr; /* * According to HW engineer, they prefer the TMR address be "naturally @@ -245,7 +277,8 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ - if (psp->toc_start_addr && + if (!amdgpu_sriov_vf(psp->adev) && + psp->toc_start_addr && psp->toc_bin_size && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); @@ -255,9 +288,10 @@ static int psp_tmr_init(struct psp_context *psp) } } + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); return ret; } @@ -278,47 +312,23 @@ static int psp_tmr_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - if (ret) - goto failed; kfree(cmd); - return 0; - -failed: - kfree(cmd); return ret; } -static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t asd_mc, uint64_t asd_mc_shared, - uint32_t size, uint32_t shared_size) +static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t asd_mc, uint32_t size) { cmd->cmd_id = GFX_CMD_ID_LOAD_ASD; cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc); cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc); cmd->cmd.cmd_load_ta.app_len = size; - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - -static int psp_asd_init(struct psp_context *psp) -{ - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for shared ASD <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->asd_shared_bo, - &psp->asd_shared_mc_addr, - &psp->asd_shared_buf); - - return ret; + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0; + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0; + cmd->cmd.cmd_load_ta.cmd_buf_len = 0; } static int psp_asd_load(struct psp_context *psp) @@ -340,11 +350,49 @@ static int psp_asd_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size); - psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr, - psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); + psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->asd_ucode_size); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (!ret) { + psp->asd_context.asd_initialized = true; + psp->asd_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = session_id; +} + +static int psp_asd_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->asd_context.asd_initialized) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (!ret) + psp->asd_context.asd_initialized = false; kfree(cmd); @@ -379,18 +427,20 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, return ret; } -static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, - uint32_t xgmi_ta_size, uint32_t shared_size) +static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t ta_bin_mc, + uint32_t ta_bin_size, + uint64_t ta_shared_mc, + uint32_t ta_shared_size) { - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc); - cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_len = ta_bin_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc); + cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; } static int psp_xgmi_init_shared_buf(struct psp_context *psp) @@ -410,6 +460,36 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp) return ret; } +static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; +} + +int psp_ta_invoke(struct psp_context *psp, + uint32_t ta_cmd_id, + uint32_t session_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + static int psp_xgmi_load(struct psp_context *psp) { int ret; @@ -418,8 +498,6 @@ static int psp_xgmi_load(struct psp_context *psp) /* * TODO: bypass the loading in sriov for now */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) @@ -428,9 +506,11 @@ static int psp_xgmi_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); - psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->xgmi_context.xgmi_shared_mc_addr, - psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_xgmi_ucode_size, + psp->xgmi_context.xgmi_shared_mc_addr, + PSP_XGMI_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -445,29 +525,25 @@ static int psp_xgmi_load(struct psp_context *psp) return ret; } -static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t xgmi_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id; -} - static int psp_xgmi_unload(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + struct amdgpu_device *adev = psp->adev; + + /* XGMI TA unload currently is not supported on Arcturus */ + if (adev->asic_type == CHIP_ARCTURUS) + return 0; /* * TODO: bypass the unloading in sriov for now */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; - psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -477,40 +553,9 @@ static int psp_xgmi_unload(struct psp_context *psp) return ret; } -static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t xgmi_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->xgmi_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id); } static int psp_xgmi_terminate(struct psp_context *psp) @@ -539,7 +584,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -564,20 +611,6 @@ static int psp_xgmi_initialize(struct psp_context *psp) } // ras begin -static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t ras_ta_mc, uint64_t ras_mc_shared, - uint32_t ras_ta_size, uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc); - cmd->cmd.cmd_load_ta.app_len = ras_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - static int psp_ras_init_shared_buf(struct psp_context *psp) { int ret; @@ -613,15 +646,17 @@ static int psp_ras_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); - psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ras.ras_shared_mc_addr, - psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_ras_ucode_size, + psp->ras.ras_shared_mc_addr, + PSP_RAS_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->ras.ras_initialized = 1; + psp->ras.ras_initialized = true; psp->ras.session_id = cmd->resp.session_id; } @@ -630,13 +665,6 @@ static int psp_ras_load(struct psp_context *psp) return ret; } -static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ras_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = ras_session_id; -} - static int psp_ras_unload(struct psp_context *psp) { int ret; @@ -652,7 +680,7 @@ static int psp_ras_unload(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -662,40 +690,15 @@ static int psp_ras_unload(struct psp_context *psp) return ret; } -static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t ras_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - /* * TODO: bypass the loading in sriov for now */ if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->ras.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); } int psp_ras_enable_features(struct psp_context *psp, @@ -728,6 +731,12 @@ static int psp_ras_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->ras.ras_initialized) return 0; @@ -735,7 +744,7 @@ static int psp_ras_terminate(struct psp_context *psp) if (ret) return ret; - psp->ras.ras_initialized = 0; + psp->ras.ras_initialized = false; /* free ras shared memory */ amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, @@ -749,6 +758,18 @@ static int psp_ras_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + if (!psp->ras.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) @@ -763,6 +784,274 @@ static int psp_ras_initialize(struct psp_context *psp) } // ras end +// HDCP start +static int psp_hdcp_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for hdcp ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return ret; +} + +static int psp_hdcp_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, + psp->ta_hdcp_ucode_size); + + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_hdcp_ucode_size, + psp->hdcp_context.hdcp_shared_mc_addr, + PSP_HDCP_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->hdcp_context.hdcp_initialized = true; + psp->hdcp_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} +static int psp_hdcp_initialize(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + + if (!psp->hdcp_context.hdcp_initialized) { + ret = psp_hdcp_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_hdcp_load(psp); + if (ret) + return ret; + + return 0; +} + +static int psp_hdcp_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id); +} + +static int psp_hdcp_terminate(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->hdcp_context.hdcp_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->hdcp_context.hdcp_initialized = false; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return 0; +} +// HDCP end + +// DTM start +static int psp_dtm_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for dtm ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return ret; +} + +static int psp_dtm_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_dtm_ucode_size, + psp->dtm_context.dtm_shared_mc_addr, + PSP_DTM_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->dtm_context.dtm_initialized = true; + psp->dtm_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static int psp_dtm_initialize(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + + if (!psp->dtm_context.dtm_initialized) { + ret = psp_dtm_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_dtm_load(psp); + if (ret) + return ret; + + return 0; +} + +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id); +} + +static int psp_dtm_terminate(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->dtm_context.dtm_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->dtm_context.dtm_initialized = false; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return 0; +} +// DTM end + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -809,36 +1098,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_asd_init(psp); - if (ret) { - DRM_ERROR("PSP asd init failed!\n"); - return ret; - } - - ret = psp_asd_load(psp); - if (ret) { - DRM_ERROR("PSP load asd failed!\n"); - return ret; - } - - if (adev->gmc.xgmi.num_physical_nodes > 1) { - ret = psp_xgmi_initialize(psp); - /* Warning the XGMI seesion initialize failure - * Instead of stop driver initialization - */ - if (ret) - dev_err(psp->adev->dev, - "XGMI: Failed to initialize XGMI session\n"); - } - - - if (psp->adev->psp.ta_fw) { - ret = psp_ras_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); - } - return 0; } @@ -852,6 +1111,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_SDMA1: *type = GFX_FW_TYPE_SDMA1; break; + case AMDGPU_UCODE_ID_SDMA2: + *type = GFX_FW_TYPE_SDMA2; + break; + case AMDGPU_UCODE_ID_SDMA3: + *type = GFX_FW_TYPE_SDMA3; + break; + case AMDGPU_UCODE_ID_SDMA4: + *type = GFX_FW_TYPE_SDMA4; + break; + case AMDGPU_UCODE_ID_SDMA5: + *type = GFX_FW_TYPE_SDMA5; + break; + case AMDGPU_UCODE_ID_SDMA6: + *type = GFX_FW_TYPE_SDMA6; + break; + case AMDGPU_UCODE_ID_SDMA7: + *type = GFX_FW_TYPE_SDMA7; + break; case AMDGPU_UCODE_ID_CP_CE: *type = GFX_FW_TYPE_CP_CE; break; @@ -900,6 +1177,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_VCN: *type = GFX_FW_TYPE_VCN; break; + case AMDGPU_UCODE_ID_VCN1: + *type = GFX_FW_TYPE_VCN1; + break; case AMDGPU_UCODE_ID_DMCU_ERAM: *type = GFX_FW_TYPE_DMCU_ERAM; break; @@ -912,6 +1192,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_VCN1_RAM: *type = GFX_FW_TYPE_VCN1_RAM; break; + case AMDGPU_UCODE_ID_DMCUB: + *type = GFX_FW_TYPE_DMUB; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -920,6 +1203,54 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, return 0; } +static void psp_print_fw_hdr(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) +{ + struct amdgpu_device *adev = psp->adev; + struct common_firmware_header *hdr; + + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + case AMDGPU_UCODE_ID_SDMA1: + case AMDGPU_UCODE_ID_SDMA2: + case AMDGPU_UCODE_ID_SDMA3: + case AMDGPU_UCODE_ID_SDMA4: + case AMDGPU_UCODE_ID_SDMA5: + case AMDGPU_UCODE_ID_SDMA6: + case AMDGPU_UCODE_ID_SDMA7: + hdr = (struct common_firmware_header *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + amdgpu_ucode_print_sdma_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_CE: + hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_PFP: + hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_ME: + hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_MEC1: + hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_RLC_G: + hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(hdr); + break; + case AMDGPU_UCODE_ID_SMC: + hdr = (struct common_firmware_header *)adev->pm.fw->data; + amdgpu_ucode_print_smc_hdr(hdr); + break; + default: + break; + } +} + static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { @@ -980,24 +1311,39 @@ out: if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && (psp_smu_reload_quirk(psp) || psp->autoload_supported)) continue; + if (amdgpu_sriov_vf(adev) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_SMC)) /*skip ucode loading in SRIOV VF */ continue; + if (psp->autoload_supported && (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) /* skip mec JT when autoload is enabled */ continue; + psp_print_fw_hdr(psp, ucode); + ret = psp_execute_np_fw_load(psp, ucode); if (ret) return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? + AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); @@ -1028,16 +1374,13 @@ static int psp_load_fw(struct amdgpu_device *adev) if (!psp->cmd) return -ENOMEM; - /* this fw pri bo is not used under SRIOV */ - if (!amdgpu_sriov_vf(psp->adev)) { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - if (ret) - goto failed; - } + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + goto failed; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, @@ -1071,6 +1414,39 @@ skip_memalloc: if (ret) goto failed; + ret = psp_asd_load(psp); + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); + return ret; + } + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); + } + return 0; failed: @@ -1115,23 +1491,29 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + void *tmr_buf; + void **pptr; if (adev->gmc.xgmi.num_physical_nodes > 1 && psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); - if (psp->adev->psp.ta_fw) + if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); + psp_dtm_terminate(psp); + psp_hdcp_terminate(psp); + } + + psp_asd_unload(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); - amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr, - &psp->asd_shared_buf); amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); @@ -1162,6 +1544,16 @@ static int psp_suspend(void *handle) DRM_ERROR("Failed to terminate ras ta\n"); return ret; } + ret = psp_hdcp_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate hdcp ta\n"); + return ret; + } + ret = psp_dtm_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate dtm ta\n"); + return ret; + } } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); @@ -1181,6 +1573,12 @@ static int psp_resume(void *handle) DRM_INFO("PSP is resuming...\n"); + ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } + mutex_lock(&adev->firmware.mutex); ret = psp_hw_start(psp); @@ -1191,6 +1589,39 @@ static int psp_resume(void *handle) if (ret) goto failed; + ret = psp_asd_load(psp); + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); + goto failed; + } + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); + } + mutex_unlock(&adev->firmware.mutex); return 0; @@ -1220,9 +1651,6 @@ int psp_rlc_autoload_start(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - if (amdgpu_sriov_vf(psp->adev)) - return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -1248,6 +1676,56 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, return psp_execute_np_fw_load(&adev->psp, &ucode); } +int psp_ring_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, + uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame *write_frame; + struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = psp_ring_get_wptr(psp); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring_buffer_start; + else + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + psp_ring_set_wptr(psp, psp_write_ptr_reg); + return 0; +} + static bool psp_check_fw_loading_status(struct amdgpu_device *adev, enum AMDGPU_UCODE_ID ucode_type) { @@ -1329,3 +1807,12 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v12_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e0fc2a790e53..611021514c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -32,11 +32,13 @@ #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 -#define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_XGMI_SHARED_MEM_SIZE 0x4000 #define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 +#define PSP_HDCP_SHARED_MEM_SIZE 0x4000 +#define PSP_DTM_SHARED_MEM_SIZE 0x4000 +#define PSP_SHARED_MEM_SIZE 0x4000 struct psp_context; struct psp_xgmi_node_info; @@ -46,6 +48,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SYSDRV = 0x10000, PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, + PSP_BL__DRAM_LONG_TRAIN = 0x100000, + PSP_BL__DRAM_SHORT_TRAIN = 0x200000, }; enum psp_ring_type @@ -89,10 +93,6 @@ struct psp_funcs enum psp_ring_type ring_type); int (*ring_destroy)(struct psp_context *psp, enum psp_ring_type ring_type); - int (*cmd_submit)(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index); bool (*compare_sram_data)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, enum AMDGPU_UCODE_ID ucode_type); @@ -109,6 +109,11 @@ struct psp_funcs struct ta_ras_trigger_error_input *info); int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); int (*rlc_autoload_start)(struct psp_context *psp); + int (*mem_training_init)(struct psp_context *psp); + void (*mem_training_fini)(struct psp_context *psp); + int (*mem_training)(struct psp_context *psp, uint32_t ops); + uint32_t (*ring_get_wptr)(struct psp_context *psp); + void (*ring_set_wptr)(struct psp_context *psp, uint32_t value); }; #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 @@ -124,6 +129,11 @@ struct psp_xgmi_topology_info { struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; }; +struct psp_asd_context { + bool asd_initialized; + uint32_t session_id; +}; + struct psp_xgmi_context { uint8_t initialized; uint32_t session_id; @@ -143,6 +153,66 @@ struct psp_ras_context { struct amdgpu_ras *ras; }; +struct psp_hdcp_context { + bool hdcp_initialized; + uint32_t session_id; + struct amdgpu_bo *hdcp_shared_bo; + uint64_t hdcp_shared_mc_addr; + void *hdcp_shared_buf; +}; + +struct psp_dtm_context { + bool dtm_initialized; + uint32_t session_id; + struct amdgpu_bo *dtm_shared_bo; + uint64_t dtm_shared_mc_addr; + void *dtm_shared_buf; +}; + +#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 +#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 +#define GDDR6_MEM_TRAINING_OFFSET 0x8000 +/*Define the VRAM size that will be encroached by BIST training.*/ +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 + +enum psp_memory_training_init_flag { + PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, + PSP_MEM_TRAIN_SUPPORT = 0x1, + PSP_MEM_TRAIN_INIT_FAILED = 0x2, + PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4, + PSP_MEM_TRAIN_INIT_SUCCESS = 0x8, +}; + +enum psp_memory_training_ops { + PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1, + PSP_MEM_TRAIN_SAVE = 0x2, + PSP_MEM_TRAIN_RESTORE = 0x4, + PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8, + PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG, + PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG, +}; + +struct psp_memory_training_context { + /*training data size*/ + u64 train_data_size; + /* + * sys_cache + * cpu virtual address + * system memory buffer that used to store the training data. + */ + void *sys_cache; + + /*vram offset of the p2c training data*/ + u64 p2c_train_data_offset; + + /*vram offset of the c2p training data*/ + u64 c2p_train_data_offset; + struct amdgpu_bo *c2p_bo; + + enum psp_memory_training_init_flag init; + u32 training_cnt; +}; + struct psp_context { struct amdgpu_device *adev; @@ -172,17 +242,13 @@ struct psp_context /* tmr buffer */ struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; - void *tmr_buf; - /* asd firmware and buffer */ + /* asd firmware */ const struct firmware *asd_fw; uint32_t asd_fw_version; uint32_t asd_feature_version; uint32_t asd_ucode_size; uint8_t *asd_start_addr; - struct amdgpu_bo *asd_shared_bo; - uint64_t asd_shared_mc_addr; - void *asd_shared_buf; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -208,9 +274,22 @@ struct psp_context uint32_t ta_ras_ucode_version; uint32_t ta_ras_ucode_size; uint8_t *ta_ras_start_addr; + + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_ucode_size; + uint8_t *ta_hdcp_start_addr; + + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_ucode_size; + uint8_t *ta_dtm_start_addr; + + struct psp_asd_context asd_context; struct psp_xgmi_context xgmi_context; struct psp_ras_context ras; + struct psp_hdcp_context hdcp_context; + struct psp_dtm_context dtm_context; struct mutex mutex; + struct psp_memory_training_context mem_train_ctx; }; struct amdgpu_psp_funcs { @@ -223,8 +302,6 @@ struct amdgpu_psp_funcs { #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) -#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ - (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ @@ -253,6 +330,12 @@ struct amdgpu_psp_funcs { (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) #define psp_rlc_autoload(psp) \ ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) +#define psp_mem_training_init(psp) \ + ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) +#define psp_mem_training_fini(psp) \ + ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0) +#define psp_mem_training(psp, ops) \ + ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) @@ -263,6 +346,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->ras_cure_posion ? \ (psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL) +#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp)) +#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value)) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -270,6 +356,7 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, @@ -280,10 +367,16 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_rlc_autoload_start(struct psp_context *psp); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, uint32_t value); +int psp_ring_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, + uint64_t fence_mc_addr, + int index); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fac7aa2c244f..cef94e2169fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -25,78 +25,13 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/reboot.h> +#include <linux/syscalls.h> #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" - -struct ras_ih_data { - /* interrupt bottom half */ - struct work_struct ih_work; - int inuse; - /* IP callback */ - ras_ih_cb cb; - /* full of entries */ - unsigned char *ring; - unsigned int ring_size; - unsigned int element_size; - unsigned int aligned_element_size; - unsigned int rptr; - unsigned int wptr; -}; - -struct ras_fs_data { - char sysfs_name[32]; - char debugfs_name[32]; -}; - -struct ras_err_data { - unsigned long ue_count; - unsigned long ce_count; -}; - -struct ras_err_handler_data { - /* point to bad pages array */ - struct { - unsigned long bp; - struct amdgpu_bo *bo; - } *bps; - /* the count of entries */ - int count; - /* the space can place new entries */ - int space_left; - /* last reserved entry's index + 1 */ - int last_reserved; -}; - -struct ras_manager { - struct ras_common_if head; - /* reference count */ - int use; - /* ras block link */ - struct list_head node; - /* the device */ - struct amdgpu_device *adev; - /* debugfs */ - struct dentry *ent; - /* sysfs */ - struct device_attribute sysfs_attr; - int attr_inuse; - - /* fs node name */ - struct ras_fs_data fs_data; - - /* IH data */ - struct ras_ih_data ih_data; - - struct ras_err_data err_data; -}; - -struct ras_badpage { - unsigned int bp; - unsigned int size; - unsigned int flags; -}; +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" const char *ras_error_string[] = { "none", @@ -130,11 +65,19 @@ const char *ras_block_string[] = { #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) -static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, - struct amdgpu_bo **bo_ptr); -static int amdgpu_ras_release_vram(struct amdgpu_device *adev, - struct amdgpu_bo **bo_ptr); +/* inject address is 52 bits */ +#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + +enum amdgpu_ras_retire_page_reservation { + AMDGPU_RAS_RETIRE_PAGE_RESERVED, + AMDGPU_RAS_RETIRE_PAGE_PENDING, + AMDGPU_RAS_RETIRE_PAGE_FAULT, +}; + +atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); + +static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, + uint64_t addr); static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -196,6 +139,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, char err[9] = "ue"; int op = -1; int block_id; + uint32_t sub_block; u64 address, value; if (*pos) @@ -223,17 +167,23 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; data->head.block = block_id; - data->head.type = memcmp("ue", err, 2) == 0 ? - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + /* only ue and ce errors are supported */ + if (!memcmp("ue", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + else if (!memcmp("ce", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else + return -EINVAL; + data->op = op; if (op == 2) { - if (sscanf(str, "%*s %*s %*s %llu %llu", - &address, &value) != 2) - if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx", - &address, &value) != 2) + if (sscanf(str, "%*s %*s %*s %u %llu %llu", + &sub_block, &address, &value) != 3) + if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", + &sub_block, &address, &value) != 3) return -EINVAL; + data->head.sub_block_index = sub_block; data->inject.address = address; data->inject.value = value; } @@ -247,6 +197,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } + /** * DOC: AMDGPU RAS debugfs control interface * @@ -266,32 +217,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * As their names indicate, inject operation will write the * value to the address. * - * Second member: struct ras_debug_if::op. + * The second member: struct ras_debug_if::op. * It has three kinds of operations. - * 0: disable RAS on the block. Take ::head as its data. - * 1: enable RAS on the block. Take ::head as its data. - * 2: inject errors on the block. Take ::inject as its data. + * + * - 0: disable RAS on the block. Take ::head as its data. + * - 1: enable RAS on the block. Take ::head as its data. + * - 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? - * programs: - * copy the struct ras_debug_if in your codes and initialize it. - * write the struct to the control node. * - * bash: - * echo op block [error [address value]] > .../ras/ras_ctrl - * op: disable, enable, inject - * disable: only block is needed - * enable: block and error are needed - * inject: error, address, value are needed - * block: umc, smda, gfx, ......... - * see ras_block_string[] for details - * error: ue, ce - * ue: multi_uncorrectable - * ce: single_correctable + * Programs + * + * Copy the struct ras_debug_if in your codes and initialize it. + * Write the struct to the control node. + * + * Shells + * + * .. code-block:: bash + * + * echo op block [error [sub_block address value]] > .../ras/ras_ctrl * - * here are some examples for bash commands, - * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl - * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * Parameters: + * + * op: disable, enable, inject + * disable: only block is needed + * enable: block and error are needed + * inject: error, address, value are needed + * block: umc, sdma, gfx, ......... + * see ras_block_string[] for details + * error: ue, ce + * ue: multi_uncorrectable + * ce: single_correctable + * sub_block: + * sub block index, pass 0 if there is no sub block + * + * here are some examples for bash commands: + * + * .. code-block:: bash + * + * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * * How to check the result? @@ -302,15 +267,17 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * For inject, please check corresponding err count at * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * - * NOTE: operation is only allowed on blocks which are supported. - * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * .. note:: + * Operations are only allowed on blocks which are supported. + * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * to see which blocks support RAS on a particular asic. + * */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; - struct amdgpu_bo *bo; int ret = 0; ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -328,22 +295,27 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - ret = amdgpu_ras_reserve_vram(adev, - data.inject.address, PAGE_SIZE, &bo); - if (ret) { - /* address was offset, now it is absolute.*/ - data.inject.address += adev->gmc.vram_start; - if (data.inject.address > adev->gmc.vram_end) - break; - } else - data.inject.address = amdgpu_bo_gpu_offset(bo); + if ((data.inject.address >= adev->gmc.mc_vram_size) || + (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = -EINVAL; + break; + } + + /* umc ce/ue error injection for a bad page is not allowed */ + if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && + amdgpu_ras_check_bad_page(adev, data.inject.address)) { + DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n", + data.inject.address); + break; + } + + /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); - amdgpu_ras_release_vram(adev, &bo); break; default: ret = -EINVAL; break; - }; + } if (ret) return -EINVAL; @@ -351,6 +323,33 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * return size; } +/** + * DOC: AMDGPU RAS debugfs EEPROM table reset interface + * + * Some boards contain an EEPROM which is used to persistently store a list of + * bad pages which experiences ECC errors in vram. This interface provides + * a way to reset the EEPROM, e.g., after testing error injection. + * + * Usage: + * + * .. code-block:: bash + * + * echo 1 > ../ras/ras_eeprom_reset + * + * will reset EEPROM table to 0 entries. + * + */ +static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + int ret; + + ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control); + + return ret == 1 ? size : -EIO; +} + static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .owner = THIS_MODULE, .read = NULL, @@ -358,6 +357,34 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { + .owner = THIS_MODULE, + .read = NULL, + .write = amdgpu_ras_debugfs_eeprom_write, + .llseek = default_llseek +}; + +/** + * DOC: AMDGPU RAS sysfs Error Count Interface + * + * It allows the user to read the error count for each IP block on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * + * It outputs the multiple lines which report the uncorrected (ue) and corrected + * (ce) error counts. + * + * The format of one line is below, + * + * [ce|ue]: count + * + * Example: + * + * .. code-block:: bash + * + * ue: 0 + * ce: 1 + * + */ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -415,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, } /* return an obj equal to head, or the first when head is NULL */ -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -536,15 +563,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) return 0; - ret = psp_ras_enable_features(&adev->psp, &info, enable); - if (ret) { - DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", - enable ? "enable":"disable", - ras_block_str(head->block), - ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - return -EAGAIN; - return -EINVAL; + if (!amdgpu_ras_intr_triggered()) { + ret = psp_ras_enable_features(&adev->psp, &info, enable); + if (ret) { + DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; + return -EINVAL; + } } /* setup the obj */ @@ -656,17 +685,77 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_err_data err_data = {0, 0, 0, NULL}; + int i; if (!obj) return -EINVAL; - /* TODO might read the register to read the count */ + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__UMC: + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, &err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__SDMA: + if (adev->sdma.funcs->query_ras_error_count) { + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.funcs->query_ras_error_count(adev, i, + &err_data); + } + break; + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__MMHUB: + if (adev->mmhub.funcs->query_ras_error_count) + adev->mmhub.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + if (adev->nbio.funcs->query_ras_error_count) + adev->nbio.funcs->query_ras_error_count(adev, &err_data); + break; + default: + break; + } + + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + if (err_data.ce_count) { + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); + } + if (err_data.ue_count) { + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); + } + return 0; } +uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) +{ + uint32_t df_inst_id; + + if ((!adev->df.funcs) || + (!adev->df.funcs->get_df_inst_id) || + (!adev->df.funcs->get_dram_base_addr)) + return addr; + + df_inst_id = adev->df.funcs->get_df_inst_id(adev); + + return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id); +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -684,13 +773,31 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; - if (block_info.block_id != TA_RAS_BLOCK__UMC) { + /* Calculate XGMI relative offset */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + block_info.address = get_xgmi_relative_phy_addr(adev, + block_info.address); + } + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->ras_error_inject) + ret = adev->gfx.funcs->ras_error_inject(adev, info); + else + ret = -EINVAL; + break; + case AMDGPU_RAS_BLOCK__UMC: + case AMDGPU_RAS_BLOCK__MMHUB: + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + case AMDGPU_RAS_BLOCK__PCIE_BIF: + ret = psp_ras_trigger_error(&adev->psp, &block_info); + break; + default: DRM_INFO("%s error injection is not supported yet\n", ras_block_str(info->head.block)); - return -EINVAL; + ret = -EINVAL; } - ret = psp_ras_trigger_error(&adev->psp, &block_info); if (ret) DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", ras_block_str(info->head.block), @@ -707,7 +814,7 @@ int amdgpu_ras_error_cure(struct amdgpu_device *adev, } /* get the total error counts on all IPs */ -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -715,7 +822,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_err_data data = {0, 0}; if (!con) - return -EINVAL; + return 0; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { @@ -723,7 +830,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, }; if (amdgpu_ras_error_query(adev, &info)) - return -EINVAL; + return 0; data.ce_count += info.ce_count; data.ue_count += info.ue_count; @@ -742,18 +849,18 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, static char *amdgpu_ras_badpage_flags_str(unsigned int flags) { switch (flags) { - case 0: + case AMDGPU_RAS_RETIRE_PAGE_RESERVED: return "R"; - case 1: + case AMDGPU_RAS_RETIRE_PAGE_PENDING: return "P"; - case 2: + case AMDGPU_RAS_RETIRE_PAGE_FAULT: default: return "F"; }; } -/* - * DOC: ras sysfs gpu_vram_bad_pages interface +/** + * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface * * It allows user to read the bad pages of vram on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages @@ -765,14 +872,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) * * gpu pfn and gpu page size are printed in hex format. * flags can be one of below character, + * * R: reserved, this gpu page is reserved and not able to use. + * * P: pending for reserve, this gpu page is marked as bad, will be reserved - * in next window of page_reserve. + * in next window of page_reserve. + * * F: unable to reserve. this gpu page can't be reserved due to some reasons. * - * examples: - * 0x00000001 : 0x00001000 : R - * 0x00000002 : 0x00001000 : P + * Examples: + * + * .. code-block:: bash + * + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + * */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, @@ -812,32 +926,8 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, { struct amdgpu_ras *con = container_of(attr, struct amdgpu_ras, features_attr); - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - struct ras_common_if head; - int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; - int i; - ssize_t s; - struct ras_manager *obj; - - s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); - for (i = 0; i < ras_block_count; i++) { - head.block = i; - - if (amdgpu_ras_is_feature_enabled(adev, &head)) { - obj = amdgpu_ras_find_obj(adev, &head); - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: %s\n", - ras_block_str(i), - ras_err_str(obj->head.type)); - } else - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: disabled\n", - ras_block_str(i)); - } - - return s; + return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); } static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) @@ -970,6 +1060,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) } /* sysfs end */ +/** + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors + * + * Normally when there is an uncorrectable error, the driver will reset + * the GPU to recover. However, in the event of an unrecoverable error, + * the driver provides an interface to reboot the system automatically + * in that event. + * + * The following file in debugfs provides that interface: + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot + * + * Usage: + * + * .. code-block:: bash + * + * echo true > .../ras/auto_reboot + * + */ /* debugfs begin */ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { @@ -977,8 +1085,21 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) struct drm_minor *minor = adev->ddev->primary; con->dir = debugfs_create_dir("ras", minor->debugfs_root); - con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_ctrl_ops); + debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); + debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_eeprom_ops); + + /* + * After one uncorrectable error happens, usually GPU recovery will + * be scheduled. But due to the known problem in GPU recovery failing + * to bring GPU back, below interface provides one direct way to + * user to reboot system automatically in such case within + * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine + * will never be called. + */ + debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir, + &con->reboot); } void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, @@ -1023,10 +1144,8 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_remove(adev, &obj->head); } - debugfs_remove(con->ent); - debugfs_remove(con->dir); + debugfs_remove_recursive(con->dir); con->dir = NULL; - con->ent = NULL; } /* debugfs end */ @@ -1054,6 +1173,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) struct ras_ih_data *data = &obj->ih_data; struct amdgpu_iv_entry entry; int ret; + struct ras_err_data err_data = {0, 0, 0, NULL}; while (data->rptr != data->wptr) { rmb(); @@ -1068,19 +1188,19 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) * from the callback to udpate the error type/count, etc */ if (data->cb) { - ret = data->cb(obj->adev, &entry); + ret = data->cb(obj->adev, &err_data, &entry); /* ue will trigger an interrupt, and in that case * we need do a reset to recovery the whole system. * But leave IP do that recovery, here we just dispatch * the error. */ - if (ret == AMDGPU_RAS_UE) { - obj->err_data.ue_count++; + if (ret == AMDGPU_RAS_SUCCESS) { + /* these counts could be left as 0 if + * some blocks do not count error number + */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; } - /* Might need get ce count by register, but not all IP - * saves ce count, some IP just use one bit or two bits - * to indicate ce happened. - */ } } } @@ -1219,6 +1339,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, data = con->eh_data; if (!data || data->count == 0) { *bps = NULL; + ret = -EINVAL; goto out; } @@ -1230,15 +1351,15 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, for (; i < data->count; i++) { (*bps)[i] = (struct ras_badpage){ - .bp = data->bps[i].bp, + .bp = data->bps[i].retired_page, .size = AMDGPU_GPU_PAGE_SIZE, - .flags = 0, + .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; if (data->last_reserved <= i) - (*bps)[i].flags = 1; - else if (data->bps[i].bo == NULL) - (*bps)[i].flags = 2; + (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; + else if (data->bps_bo[i] == NULL) + (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } *count = data->count; @@ -1252,109 +1373,51 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); - amdgpu_device_gpu_recover(ras->adev, 0); + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, 0); atomic_set(&ras->in_recovery, 0); } -static int amdgpu_ras_release_vram(struct amdgpu_device *adev, - struct amdgpu_bo **bo_ptr) -{ - /* no need to free it actually. */ - amdgpu_bo_free_kernel(bo_ptr, NULL, NULL); - return 0; -} - -/* reserve vram with size@offset */ -static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, - struct amdgpu_bo **bo_ptr) -{ - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - struct amdgpu_bo *bo; - - if (bo_ptr) - *bo_ptr = NULL; - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - - r = amdgpu_bo_create(adev, &bp, &bo); - if (r) - return -EINVAL; - - r = amdgpu_bo_reserve(bo, false); - if (r) - goto error_reserve; - - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } - - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx); - if (r) - goto error_pin; - - r = amdgpu_bo_pin_restricted(bo, - AMDGPU_GEM_DOMAIN_VRAM, - offset, - offset + size); - if (r) - goto error_pin; - - if (bo_ptr) - *bo_ptr = bo; - - amdgpu_bo_unreserve(bo); - return r; - -error_pin: - amdgpu_bo_unreserve(bo); -error_reserve: - amdgpu_bo_unref(&bo); - return r; -} - /* alloc/realloc bps array */ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, struct ras_err_handler_data *data, int pages) { unsigned int old_space = data->count + data->space_left; unsigned int new_space = old_space + pages; - unsigned int align_space = ALIGN(new_space, 1024); - void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - - if (!tmp) + unsigned int align_space = ALIGN(new_space, 512); + void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + struct amdgpu_bo **bps_bo = + kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); + + if (!bps || !bps_bo) { + kfree(bps); + kfree(bps_bo); return -ENOMEM; + } if (data->bps) { - memcpy(tmp, data->bps, + memcpy(bps, data->bps, data->count * sizeof(*data->bps)); kfree(data->bps); } + if (data->bps_bo) { + memcpy(bps_bo, data->bps_bo, + data->count * sizeof(*data->bps_bo)); + kfree(data->bps_bo); + } - data->bps = tmp; + data->bps = bps; + data->bps_bo = bps_bo; data->space_left += align_space - old_space; return 0; } /* it deal with vram only. */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - unsigned long *bps, int pages) + struct eeprom_table_record *bps, int pages) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; - int i = pages; int ret = 0; if (!con || !con->eh_data || !bps || pages <= 0) @@ -1371,24 +1434,120 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - while (i--) - data->bps[data->count++].bp = bps[i]; - + memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); + data->count += pages; data->space_left -= pages; + out: mutex_unlock(&con->recovery_lock); return ret; } +/* + * write error record array to eeprom, the function should be + * protected by recovery_lock + */ +static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct amdgpu_ras_eeprom_control *control; + int save_count; + + if (!con || !con->eh_data) + return 0; + + control = &con->eeprom_control; + data = con->eh_data; + save_count = data->count - control->num_recs; + /* only new entries are saved */ + if (save_count > 0) + if (amdgpu_ras_eeprom_process_recods(control, + &data->bps[control->num_recs], + true, + save_count)) { + DRM_ERROR("Failed to save EEPROM table data!"); + return -EIO; + } + + return 0; +} + +/* + * read error record array in eeprom and reserve enough space for + * storing new bad pages + */ +static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras.ras->eeprom_control; + struct eeprom_table_record *bps = NULL; + int ret = 0; + + /* no bad page record, skip eeprom access */ + if (!control->num_recs) + return ret; + + bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); + if (!bps) + return -ENOMEM; + + if (amdgpu_ras_eeprom_process_recods(control, bps, false, + control->num_recs)) { + DRM_ERROR("Failed to load EEPROM table records!"); + ret = -EIO; + goto out; + } + + ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); + +out: + kfree(bps); + return ret; +} + +/* + * check if an address belongs to bad page + * + * Note: this check is only for umc block + */ +static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, + uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i; + bool ret = false; + + if (!con || !con->eh_data) + return ret; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + addr >>= AMDGPU_GPU_PAGE_SHIFT; + for (i = 0; i < data->count; i++) + if (addr == data->bps[i].retired_page) { + ret = true; + goto out; + } + +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + /* called in gpu recovery/init */ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; uint64_t bp; - struct amdgpu_bo *bo; - int i; + struct amdgpu_bo *bo = NULL; + int i, ret = 0; if (!con || !con->eh_data) return 0; @@ -1399,18 +1558,29 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) goto out; /* reserve vram at driver post stage. */ for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].bp; + bp = data->bps[i].retired_page; - if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT, - PAGE_SIZE, &bo)) - DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp); + /* There are two cases of reserve error should be ignored: + * 1) a ras bad page has been allocated (used by someone); + * 2) a ras bad page has been reserved (duplicate error injection + * for one page); + */ + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL)) + DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp); - data->bps[i].bo = bo; + data->bps_bo[i] = bo; data->last_reserved = i + 1; + bo = NULL; } + + /* continue to save bad pages to eeprom even reesrve_vram fails */ + ret = amdgpu_ras_save_bad_pages(adev); out: mutex_unlock(&con->recovery_lock); - return 0; + return ret; } /* called when driver unload */ @@ -1430,11 +1600,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) goto out; for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps[i].bo; + bo = data->bps_bo[i]; - amdgpu_ras_release_vram(adev, &bo); + amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps[i].bo = bo; + data->bps_bo[i] = bo; data->last_reserved = i; } out: @@ -1442,41 +1612,54 @@ out: return 0; } -static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) -{ - /* TODO - * write the array to eeprom when SMU disabled. - */ - return 0; -} - -static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) -{ - /* TODO - * read the array to eeprom when SMU disabled. - */ - return 0; -} - -static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data **data = &con->eh_data; + struct ras_err_handler_data **data; + int ret; - *data = kmalloc(sizeof(**data), - GFP_KERNEL|__GFP_ZERO); - if (!*data) - return -ENOMEM; + if (con) + data = &con->eh_data; + else + return 0; + + *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + if (!*data) { + ret = -ENOMEM; + goto out; + } mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); con->adev = adev; - amdgpu_ras_load_bad_pages(adev); - amdgpu_ras_reserve_bad_pages(adev); + ret = amdgpu_ras_eeprom_init(&con->eeprom_control); + if (ret) + goto free; + + if (con->eeprom_control.num_recs) { + ret = amdgpu_ras_load_bad_pages(adev); + if (ret) + goto free; + ret = amdgpu_ras_reserve_bad_pages(adev); + if (ret) + goto release; + } return 0; + +release: + amdgpu_ras_release_bad_pages(adev); +free: + kfree((*data)->bps); + kfree((*data)->bps_bo); + kfree(*data); + con->eh_data = NULL; +out: + DRM_WARN("Failed to initialize ras recovery!\n"); + + return ret; } static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) @@ -1484,13 +1667,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; + /* recovery_init failed to init it, fini is useless */ + if (!data) + return 0; + cancel_work_sync(&con->recovery_work); - amdgpu_ras_save_bad_pages(adev); amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); + kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); @@ -1527,7 +1714,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, *supported = 0; if (amdgpu_sriov_vf(adev) || - adev->asic_type != CHIP_VEGA20) + (adev->asic_type != CHIP_VEGA20 && + adev->asic_type != CHIP_ARCTURUS)) return; if (adev->is_atom_fw && @@ -1542,6 +1730,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int r; if (con) return 0; @@ -1569,8 +1758,17 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; - if (amdgpu_ras_recovery_init(adev)) - goto recovery_out; + if (adev->nbio.funcs->init_ras_controller_interrupt) { + r = adev->nbio.funcs->init_ras_controller_interrupt(adev); + if (r) + return r; + } + + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { + r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); + if (r) + return r; + } amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; @@ -1582,14 +1780,84 @@ int amdgpu_ras_init(struct amdgpu_device *adev) con->hw_supported, con->supported); return 0; fs_out: - amdgpu_ras_recovery_fini(adev); -recovery_out: amdgpu_ras_set_context(adev, NULL); kfree(con); return -EINVAL; } +/* helper function to handle common stuff in ip late init phase */ +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info) +{ + int r; + + /* disable RAS feature per IP block if it is not supported */ + if (!amdgpu_ras_is_supported(adev, ras_block->block)) { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); + return 0; + } + + r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); + if (r) { + if (r == -EAGAIN) { + /* request gpu reset. will run again */ + amdgpu_ras_request_reset_on_boot(adev, + ras_block->block); + return 0; + } else if (adev->in_suspend || adev->in_gpu_reset) { + /* in resume phase, if fail to enable ras, + * clean up all ras fs nodes, and disable ras */ + goto cleanup; + } else + return r; + } + + /* in resume phase, no need to create ras fs node */ + if (adev->in_suspend || adev->in_gpu_reset) + return 0; + + if (ih_info->cb) { + r = amdgpu_ras_interrupt_add_handler(adev, ih_info); + if (r) + goto interrupt; + } + + amdgpu_ras_debugfs_create(adev, fs_info); + + r = amdgpu_ras_sysfs_create(adev, fs_info); + if (r) + goto sysfs; + + return 0; +cleanup: + amdgpu_ras_sysfs_remove(adev, ras_block); +sysfs: + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, ras_block, 0); + return r; +} + +/* helper function to remove ras fs node and interrupt handler */ +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info) +{ + if (!ras_block || !ih_info) + return; + + amdgpu_ras_sysfs_remove(adev, ras_block); + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); + amdgpu_ras_feature_enable(adev, ras_block, 0); +} + /* do some init work after IP late init as dependence. * and it runs in resume/gpu reset/booting up cases. */ @@ -1632,7 +1900,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) * See feature_enable_on_boot */ amdgpu_ras_disable_all_features(adev, 1); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } } @@ -1683,3 +1951,18 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) +{ + uint32_t hw_supported, supported; + + amdgpu_ras_check_supported(adev, &hw_supported, &supported); + if (!hw_supported) + return; + + if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { + DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); + + amdgpu_ras_reset_gpu(adev); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2841195bd3b..a5fe29a9373e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_psp.h" #include "ta_ras_if.h" +#include "amdgpu_ras_eeprom.h" enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, @@ -52,6 +53,236 @@ enum amdgpu_ras_block { #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { + /* CPC */ + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF */ + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, + AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + /* CPG */ + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + /* GDS */ + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ */ + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, + AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA */ + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA */ + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI */ + AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP */ + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + AMDGPU_RAS_BLOCK__GFX_MAX +}; + enum amdgpu_ras_error_type { AMDGPU_RAS_ERROR__NONE = 0, AMDGPU_RAS_ERROR__PARITY = 1, @@ -76,9 +307,6 @@ struct ras_common_if { char name[32]; }; -typedef int (*ras_ih_cb)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); - struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -89,8 +317,6 @@ struct amdgpu_ras { struct list_head head; /* debugfs */ struct dentry *dir; - /* debugfs ctrl */ - struct dentry *ent; /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; @@ -106,10 +332,84 @@ struct amdgpu_ras { struct mutex recovery_lock; uint32_t flags; + bool reboot; + struct amdgpu_ras_eeprom_control eeprom_control; }; -/* interfaces for IP */ +struct ras_fs_data { + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_err_data { + unsigned long ue_count; + unsigned long ce_count; + unsigned long err_addr_cnt; + struct eeprom_table_record *err_addr; +}; + +struct ras_err_handler_data { + /* point to bad page records array */ + struct eeprom_table_record *bps; + /* point to reserved bo array */ + struct amdgpu_bo **bps_bo; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); + +struct ras_ih_data { + /* interrupt bottom half */ + struct work_struct ih_work; + int inuse; + /* IP callback */ + ras_ih_cb cb; + /* full of entries */ + unsigned char *ring; + unsigned int ring_size; + unsigned int element_size; + unsigned int aligned_element_size; + unsigned int rptr; + unsigned int wptr; +}; +struct ras_manager { + struct ras_common_if head; + /* reference count */ + int use; + /* ras block link */ + struct list_head node; + /* the device */ + struct amdgpu_device *adev; + /* debugfs */ + struct dentry *ent; + /* sysfs */ + struct device_attribute sysfs_attr; + int attr_inuse; + + /* fs node name */ + struct ras_fs_data fs_data; + + /* IH data */ + struct ras_ih_data ih_data; + + struct ras_err_data err_data; +}; + +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + +/* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; char sysfs_name[32]; @@ -178,26 +478,32 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_recovery_init(struct amdgpu_device *adev); int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - unsigned long *bps, int pages); + struct eeprom_table_record *bps, int pages); int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, - bool is_baco) +static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + /* save bad page to eeprom before gpu reset, + * i2c may be unstable in gpu reset + */ + if (in_task()) + amdgpu_ras_reserve_bad_pages(adev); + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); return 0; @@ -263,6 +569,13 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { int amdgpu_ras_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info); +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info); int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); @@ -296,4 +609,22 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); + +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + +extern atomic_t amdgpu_ras_in_intr; + +static inline bool amdgpu_ras_intr_triggered(void) +{ + return !!atomic_read(&amdgpu_ras_in_intr); +} + +static inline void amdgpu_ras_intr_cleared(void) +{ + atomic_set(&amdgpu_ras_in_intr, 0); +} + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c new file mode 100644 index 000000000000..2a8e04895595 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -0,0 +1,525 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras_eeprom.h" +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include <linux/bits.h> +#include "smu_v11_0_i2c.h" + +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 +#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 + +/* + * The 2 macros bellow represent the actual size in bytes that + * those entities occupy in the EEPROM memory. + * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * uses uint64 to store 6b fields such as retired_page. + */ +#define EEPROM_TABLE_HEADER_SIZE 20 +#define EEPROM_TABLE_RECORD_SIZE 24 + +#define EEPROM_ADDRESS_SIZE 0x2 + +/* Table hdr is 'AMDR' */ +#define EEPROM_TABLE_HDR_VAL 0x414d4452 +#define EEPROM_TABLE_VER 0x00010000 + +/* Assume 2 Mbit size */ +#define EEPROM_SIZE_BYTES 256000 +#define EEPROM_PAGE__SIZE_BYTES 256 +#define EEPROM_HDR_START 0 +#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) +#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) +#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev + +static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *) buff; + + pp[0] = cpu_to_le32(hdr->header); + pp[1] = cpu_to_le32(hdr->version); + pp[2] = cpu_to_le32(hdr->first_rec_offset); + pp[3] = cpu_to_le32(hdr->tbl_size); + pp[4] = cpu_to_le32(hdr->checksum); +} + +static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *)buff; + + hdr->header = le32_to_cpu(pp[0]); + hdr->version = le32_to_cpu(pp[1]); + hdr->first_rec_offset = le32_to_cpu(pp[2]); + hdr->tbl_size = le32_to_cpu(pp[3]); + hdr->checksum = le32_to_cpu(pp[4]); +} + +static int __update_table_header(struct amdgpu_ras_eeprom_control *control, + unsigned char *buff) +{ + int ret = 0; + struct i2c_msg msg = { + .addr = 0, + .flags = 0, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + + *(uint16_t *)buff = EEPROM_HDR_START; + __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + + msg.addr = control->i2c_address; + + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) + DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); + + return ret; +} + + + +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) +{ + int i; + uint32_t tbl_sum = 0; + + /* Header checksum, skip checksum field in the calculation */ + for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) + tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); + + return tbl_sum; +} + +static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, + int num) +{ + int i, j; + uint32_t tbl_sum = 0; + + /* Records checksum */ + for (i = 0; i < num; i++) { + struct eeprom_table_record *record = &records[i]; + + for (j = 0; j < sizeof(*record); j++) { + tbl_sum += *(((unsigned char *)record) + j); + } + } + + return tbl_sum; +} + +static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); +} + +/* Checksum = 256 -((sum of all table entries) mod 256) */ +static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num, + uint32_t old_hdr_byte_sum) +{ + /* + * This will update the table sum with new records. + * + * TODO: What happens when the EEPROM table is to be wrapped around + * and old records from start will get overridden. + */ + + /* need to recalculate updated header byte sum */ + control->tbl_byte_sum -= old_hdr_byte_sum; + control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); + + control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); +} + +/* table sum mod 256 + checksum must equals 256 */ +static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + + if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { + DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); + return false; + } + + return true; +} + +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) +{ + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + int ret = 0; + + mutex_lock(&control->tbl_mutex); + + hdr->header = EEPROM_TABLE_HDR_VAL; + hdr->version = EEPROM_TABLE_VER; + hdr->first_rec_offset = EEPROM_RECORD_START; + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + + control->tbl_byte_sum = 0; + __update_tbl_checksum(control, NULL, 0, 0); + control->next_addr = EEPROM_RECORD_START; + + ret = __update_table_header(control, buff); + + mutex_unlock(&control->tbl_mutex); + + return ret; + +} + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) +{ + int ret = 0; + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct i2c_msg msg = { + .addr = 0, + .flags = I2C_M_RD, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + mutex_init(&control->tbl_mutex); + + switch (adev->asic_type) { + case CHIP_VEGA20: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20; + ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); + break; + + case CHIP_ARCTURUS: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS; + ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); + break; + + default: + return 0; + } + + if (ret) { + DRM_ERROR("Failed to init I2C controller, ret:%d", ret); + return ret; + } + + msg.addr = control->i2c_address; + + /* Read/Create table header from EEPROM address 0 */ + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) { + DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); + return ret; + } + + __decode_table_header_from_buff(hdr, &buff[2]); + + if (hdr->header == EEPROM_TABLE_HDR_VAL) { + control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / + EEPROM_TABLE_RECORD_SIZE; + control->tbl_byte_sum = __calc_hdr_byte_sum(control); + control->next_addr = EEPROM_RECORD_START; + + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->num_recs); + + } else { + DRM_INFO("Creating new EEPROM table"); + + ret = amdgpu_ras_eeprom_reset_table(control); + } + + return ret == 1 ? 0 : -EIO; +} + +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + switch (adev->asic_type) { + case CHIP_VEGA20: + smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); + break; + case CHIP_ARCTURUS: + smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor); + break; + + default: + return; + } +} + +static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + buff[i++] = record->err_type; + + buff[i++] = record->bank; + + tmp = cpu_to_le64(record->ts); + memcpy(buff + i, &tmp, 8); + i += 8; + + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); +} + +static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + record->err_type = buff[i++]; + + record->bank = buff[i++]; + + memcpy(&tmp, buff + i, 8); + record->ts = le64_to_cpu(tmp); + i += 8; + + memcpy(&tmp, buff + i, 6); + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); + i += 6; + + record->mem_channel = buff[i++]; + record->mcumc_id = buff[i++]; + + memcpy(&tmp, buff + i, 6); + record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +/* + * When reaching end of EEPROM memory jump back to 0 record address + * When next record access will go beyond EEPROM page boundary modify bits A17/A8 + * in I2C selector to go to next page + */ +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) +{ + uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; + + /* When all EEPROM memory used jump back to 0 address */ + if (next_address > EEPROM_SIZE_BYTES) { + DRM_INFO("Reached end of EEPROM memory, jumping to 0 " + "and overriding old record"); + return EEPROM_RECORD_START; + } + + /* + * To check if we overflow page boundary compare next address with + * current and see if bits 17/8 of the EEPROM address will change + * If they do start from the next 256b page + * + * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 + */ + if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { + DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", + (next_address & EEPROM_ADDR_MSB_MASK)); + + return (next_address & EEPROM_ADDR_MSB_MASK); + } + + return curr_address; +} + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num) +{ + int i, ret = 0; + struct i2c_msg *msgs, *msg; + unsigned char *buffs, *buff; + struct eeprom_table_record *record; + struct amdgpu_device *adev = to_amdgpu_device(control); + + if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS) + return 0; + + buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, + GFP_KERNEL); + if (!buffs) + return -ENOMEM; + + mutex_lock(&control->tbl_mutex); + + msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); + if (!msgs) { + ret = -ENOMEM; + goto free_buff; + } + + /* In case of overflow just start from beginning to not lose newest records */ + if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) + control->next_addr = EEPROM_RECORD_START; + + + /* + * TODO Currently makes EEPROM writes for each record, this creates + * internal fragmentation. Optimized the code to do full page write of + * 256b + */ + for (i = 0; i < num; i++) { + buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; + msg = &msgs[i]; + + control->next_addr = __correct_eeprom_dest_address(control->next_addr); + + /* + * Update bits 16,17 of EEPROM address in I2C address by setting them + * to bits 1,2 of Device address byte + */ + msg->addr = control->i2c_address | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->flags = write ? 0 : I2C_M_RD; + msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; + msg->buf = buff; + + /* Insert the EEPROM dest addess, bits 0-15 */ + buff[0] = ((control->next_addr >> 8) & 0xff); + buff[1] = (control->next_addr & 0xff); + + /* EEPROM table content is stored in LE format */ + if (write) + __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + + /* + * The destination EEPROM address might need to be corrected to account + * for page or entire memory wrapping + */ + control->next_addr += EEPROM_TABLE_RECORD_SIZE; + } + + ret = i2c_transfer(&control->eeprom_accessor, msgs, num); + if (ret < 1) { + DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + + /* TODO Restore prev next EEPROM address ? */ + goto free_msgs; + } + + + if (!write) { + for (i = 0; i < num; i++) { + buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; + + __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + } + } + + if (write) { + uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + + /* + * Update table header with size and CRC and account for table + * wrap around where the assumption is that we treat it as empty + * table + * + * TODO - Check the assumption is correct + */ + control->num_recs += num; + control->num_recs %= EEPROM_MAX_RECORD_NUM; + control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; + if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) + control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + + control->num_recs * EEPROM_TABLE_RECORD_SIZE; + + __update_tbl_checksum(control, records, num, old_hdr_byte_sum); + + __update_table_header(control, buffs); + } else if (!__validate_tbl_checksum(control, records, num)) { + DRM_WARN("EEPROM Table checksum mismatch!"); + /* TODO Uncomment when EEPROM read/write is relliable */ + /* ret = -EIO; */ + } + +free_msgs: + kfree(msgs); + +free_buff: + kfree(buffs); + + mutex_unlock(&control->tbl_mutex); + + return ret == num ? 0 : -EIO; +} + +/* Used for testing if bugs encountered */ +#if 0 +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +{ + int i; + struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + + if (!recs) + return; + + for (i = 0; i < 1 ; i++) { + recs[i].address = 0xdeadbeef; + recs[i].retired_page = i; + } + + if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + + memset(recs, 0, sizeof(*recs) * 1); + + control->next_addr = EEPROM_RECORD_START; + + if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { + for (i = 0; i < 1; i++) + DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", + recs[i].address, recs[i].retired_page); + } else + DRM_ERROR("Failed in reading from table"); + + } else + DRM_ERROR("Failed in writing to table"); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h new file mode 100644 index 000000000000..ca78f812d436 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -0,0 +1,92 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_RAS_EEPROM_H +#define _AMDGPU_RAS_EEPROM_H + +#include <linux/i2c.h> + +struct amdgpu_device; + +enum amdgpu_ras_eeprom_err_type{ + AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, + AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE +}; + +struct amdgpu_ras_eeprom_table_header { + uint32_t header; + uint32_t version; + uint32_t first_rec_offset; + uint32_t tbl_size; + uint32_t checksum; +}__attribute__((__packed__)); + +struct amdgpu_ras_eeprom_control { + struct amdgpu_ras_eeprom_table_header tbl_hdr; + struct i2c_adapter eeprom_accessor; + uint32_t next_addr; + unsigned int num_recs; + struct mutex tbl_mutex; + bool bus_locked; + uint32_t tbl_byte_sum; + uint16_t i2c_address; // 8-bit represented address +}; + +/* + * Represents single table record. Packed to be easily serialized into byte + * stream. + */ +struct eeprom_table_record { + + union { + uint64_t address; + uint64_t offset; + }; + + uint64_t retired_page; + uint64_t ts; + + enum amdgpu_ras_eeprom_err_type err_type; + + union { + unsigned char bank; + unsigned char cu; + }; + + unsigned char mem_channel; + unsigned char mcumc_id; +}__attribute__((__packed__)); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num); + +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); + +#endif // _AMDGPU_RAS_EEPROM_H diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4410c97ac9b7..930316e60155 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 24 +#define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index c8793e6cc3c5..6373bfb47d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) */ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; u32 dws; int r; /* allocate clear state block */ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) return r; } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 5c13c503e61f..a2ee30b16212 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -83,3 +84,101 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, return csa_mc_addr; } + +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info) +{ + int r, i; + struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; + struct ras_fs_if fs_info = { + .sysfs_name = "sdma_err_count", + .debugfs_name = "sdma_err_inject", + }; + + if (!ih_info) + return -EINVAL; + + if (!adev->sdma.ras_if) { + adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->sdma.ras_if) + return -ENOMEM; + adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if->sub_block_index = 0; + strcpy(adev->sdma.ras_if->name, "sdma"); + } + fs_info.head = ih_info->head = *adev->sdma.ras_if; + + r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, + &fs_info, ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto late_fini; + } + } else { + r = 0; + goto free; + } + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); +free: + kfree(adev->sdma.ras_if); + adev->sdma.ras_if = NULL; + return r; +} + +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && + adev->sdma.ras_if) { + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + /* the cb member will not be used by + * amdgpu_ras_interrupt_remove_handler, init it only + * to cheat the check in ras_late_fini + */ + .cb = amdgpu_sdma_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev); + + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 35dd152f9d5c..485335267d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -25,11 +25,17 @@ #define __AMDGPU_SDMA_H__ /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 2 +#define AMDGPU_MAX_SDMA_INSTANCES 8 enum amdgpu_sdma_irq { AMDGPU_SDMA_IRQ_INSTANCE0 = 0, AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_SDMA_IRQ_INSTANCE2, + AMDGPU_SDMA_IRQ_INSTANCE3, + AMDGPU_SDMA_IRQ_INSTANCE4, + AMDGPU_SDMA_IRQ_INSTANCE5, + AMDGPU_SDMA_IRQ_INSTANCE6, + AMDGPU_SDMA_IRQ_INSTANCE7, AMDGPU_SDMA_IRQ_LAST }; @@ -44,8 +50,18 @@ struct amdgpu_sdma_instance { bool burst_nop; }; +struct amdgpu_sdma_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev, + void *ras_ih_info); + void (*ras_fini)(struct amdgpu_device *adev); + int (*query_ras_error_count)(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status); +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; + struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; + uint32_t num_sdma_sched; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; struct amdgpu_irq_src ecc_irq; @@ -53,6 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; + const struct amdgpu_sdma_ras_funcs *funcs; }; /* @@ -98,4 +115,13 @@ struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info); +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 9828f3c7c655..a09b6b9c27d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit) { struct amdgpu_sync_entry *e; @@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, * amdgpu_sync_fence - remember to sync to this fence * * @sync: sync object to add fence to - * @fence: fence to sync to + * @f: fence to sync to + * @explicit: if this is an explicit dependency * + * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f, bool explicit) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_same_dev(adev, f) && - amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) - amdgpu_sync_keep_later(&sync->last_vm_update, f); if (amdgpu_sync_add_later(sync, f, explicit)) return 0; @@ -180,6 +180,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, } /** + * amdgpu_sync_vm_fence - remember to sync to this VM fence + * + * @adev: amdgpu device + * @sync: sync object to add fence to + * @fence: the VM fence to add + * + * Add the fence to the sync object and remember it as VM update. + */ +int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) +{ + if (!fence) + return 0; + + amdgpu_sync_keep_later(&sync->last_vm_update, fence); + return amdgpu_sync_fence(sync, fence, false); +} + +/** * amdgpu_sync_resv - sync to a reservation object * * @sync: sync object to add fences from reservation object to @@ -190,10 +208,10 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct reservation_object *resv, + struct dma_resv *resv, void *owner, bool explicit_sync) { - struct reservation_object_list *flist; + struct dma_resv_list *flist; struct dma_fence *f; void *fence_owner; unsigned i; @@ -203,16 +221,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, return -EINVAL; /* always sync to the exclusive fence */ - f = reservation_object_get_excl(resv); - r = amdgpu_sync_fence(adev, sync, f, false); + f = dma_resv_get_excl(resv); + r = amdgpu_sync_fence(sync, f, false); - flist = reservation_object_get_list(resv); + flist = dma_resv_get_list(resv); if (!flist || r) return r; for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); /* We only want to trigger KFD eviction fences on * evict or move jobs. Skip KFD fences otherwise. */ @@ -222,13 +240,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, continue; if (amdgpu_sync_same_dev(adev, f)) { - /* VM updates are only interesting - * for other VM updates and moves. + /* VM updates only sync with moves but not with user + * command submissions or KFD evictions fences */ - if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && - (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && - ((owner == AMDGPU_FENCE_OWNER_VM) != - (fence_owner == AMDGPU_FENCE_OWNER_VM))) + if (owner == AMDGPU_FENCE_OWNER_VM && + fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) continue; /* Ignore fence from the same owner and explicit one as @@ -239,7 +255,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, continue; } - r = amdgpu_sync_fence(adev, sync, f, false); + r = amdgpu_sync_fence(sync, f, false); if (r) break; } @@ -340,7 +356,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + r = amdgpu_sync_fence(clone, f, e->explicit); if (r) return r; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 10cf23a57f17..d62c2b81d92b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -27,7 +27,7 @@ #include <linux/hashtable.h> struct dma_fence; -struct reservation_object; +struct dma_resv; struct amdgpu_device; struct amdgpu_ring; @@ -40,16 +40,18 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f, bool explicit); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit); +int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct reservation_object *resv, + struct dma_resv *resv, void *owner, bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, + bool *explicit); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b66d29d5ffa2..b158230af8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 77674a7b9616..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __field(unsigned int, context) __field(unsigned int, seqno) __field(struct dma_fence *, fence) - __field(char *, ring_name) + __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), @@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __entry->ring_name, __entry->num_ibs) + __entry->seqno, __get_str(ring), __entry->num_ibs) ); TRACE_EVENT(amdgpu_sched_run_job, @@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __field(unsigned int, context) __field(unsigned int, seqno) - __field(char *, ring_name) + __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), @@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __entry->ring_name, __entry->num_ibs) + __entry->seqno, __get_str(ring), __entry->num_ibs) ); @@ -323,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags), - TP_ARGS(pe, addr, count, incr, flags), + uint32_t incr, uint64_t flags, bool direct), + TP_ARGS(pe, addr, count, incr, flags, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) + __field(bool, direct) ), TP_fast_assign( @@ -339,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", - __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count) + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " + "direct=%d", __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count, __entry->direct) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count), - TP_ARGS(pe, src, count), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), + TP_ARGS(pe, src, count, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) + __field(bool, direct) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u", - __entry->pe, __entry->src, __entry->count) + TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", + __entry->pe, __entry->src, __entry->count, + __entry->direct) ); TRACE_EVENT(amdgpu_vm_flush, @@ -468,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __field(const char *,name) + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) @@ -476,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __entry->name = sched_job->base.sched->name; + __assign_str(ring, sched_job->base.sched->name) __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", - __entry->name, __entry->id, + __get_str(ring), __entry->id, __entry->fence, __entry->ctx, __entry->seqno) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e51b48ac48eb..dee446278417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -35,10 +35,13 @@ #include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swiotlb.h> +#include <linux/dma-buf.h> +#include <linux/sizes.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> @@ -54,6 +57,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #include "bif/bif_4_1_d.h" static int amdgpu_map_buffer(struct ttm_buffer_object *bo, @@ -227,7 +231,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; - return drm_vma_node_verify_access(&abo->gem_base.vma_node, + return drm_vma_node_verify_access(&abo->tbo.base.vma_node, filp->private_data); } @@ -303,7 +307,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, uint64_t size, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **f) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -440,10 +444,26 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, - bo->resv, &fence); + bo->base.resv, &fence); if (r) goto error; + /* clear the space being freed */ + if (old_mem->mem_type == TTM_PL_VRAM && + (ttm_to_amdgpu_bo(bo)->flags & + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + struct dma_fence *wipe_fence = NULL; + + r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, + NULL, &wipe_fence); + if (r) { + goto error; + } else if (wipe_fence) { + dma_fence_put(fence); + fence = wipe_fence; + } + } + /* Always block for VM page tables before committing the new location */ if (bo->type == ttm_bo_type_kernel) r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); @@ -468,15 +488,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_place placements; struct ttm_placement placement; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -527,15 +544,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; struct ttm_place placements; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -747,6 +761,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, */ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; + struct drm_gem_object *gobj; u64 offset; uint64_t userptr; struct task_struct *usertask; @@ -756,6 +771,20 @@ struct amdgpu_ttm_tt { #endif }; +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + /** * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -763,97 +792,89 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - -#define MAX_RETRY_HMM_RANGE_FAULT 16 - int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct hmm_range *range; + unsigned long timeout; + struct mm_struct *mm; unsigned long i; - uint64_t *pfns; - int retry = 0; int r = 0; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; - - if (unlikely(!mirror)) { - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); - r = -EFAULT; - goto out; + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); + return -EFAULT; } - vma = find_vma(mm, start); - if (unlikely(!vma || start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vma->vm_file)) { - r = -EPERM; - goto out; - } + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; + + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ + return -ESRCH; range = kzalloc(sizeof(*range), GFP_KERNEL); if (unlikely(!range)) { r = -ENOMEM; goto out; } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) { + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { r = -ENOMEM; goto out_free_ranges; } - amdgpu_hmm_init_range(range); - range->default_flags = range->flags[HMM_PFN_VALID]; - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - range->pfn_flags_mask = 0; - range->pfns = pfns; - hmm_range_register(range, mirror, start, - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out_unlock; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out_unlock; + } + up_read(&mm->mmap_sem); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); retry: - /* - * Just wait for range to be valid, safe to ignore return value as we - * will use the return value of hmm_range_fault() below under the - * mmap_sem to ascertain the validity of the range. - */ - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); down_read(&mm->mmap_sem); - - r = hmm_range_fault(range, true); - if (unlikely(r < 0)) { - if (likely(r == -EAGAIN)) { - /* - * return -EAGAIN, mmap_sem is dropped - */ - if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) - goto retry; - else - pr_err("Retry hmm fault too many times\n"); - } - - goto out_up_read; - } - + r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); + if (unlikely(r <= 0)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_device_entry_to_page(range, pfns[i]); + /* FIXME: The pages cannot be touched outside the notifier_lock */ + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", - i, pfns[i]); + i, range->pfns[i]); r = -ENOMEM; goto out_free_pfns; @@ -861,18 +882,18 @@ retry: } gtt->range = range; + mmput(mm); return 0; -out_up_read: - if (likely(r != -EAGAIN)) - up_read(&mm->mmap_sem); +out_unlock: + up_read(&mm->mmap_sem); out_free_pfns: - hmm_range_unregister(range); - kvfree(pfns); + kvfree(range->pfns); out_free_ranges: kfree(range); out: + mmput(mm); return r; } @@ -897,15 +918,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) "No user pages to check\n"); if (gtt->range) { - r = hmm_range_valid(gtt->range); - hmm_range_unregister(gtt->range); - + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + r = mmu_interval_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); kvfree(gtt->range->pfns); kfree(gtt->range); gtt->range = NULL; } - return r; + return !r; } #endif @@ -986,10 +1010,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range && - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, - gtt->range->pfns[0])) - WARN_ONCE(1, "Missing get_user_page_done\n"); + if (gtt->range) { + unsigned long i; + + for (i = 0; i < ttm->num_pages; i++) { + if (ttm->pages[i] != + hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[i])) + break; + } + + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); + } #endif } @@ -1216,16 +1248,14 @@ static struct ttm_backend_func amdgpu_backend_func = { static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bo->bdev); - gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; + gtt->gobj = &bo->base; /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { @@ -1246,7 +1276,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt && gtt->userptr) { @@ -1259,7 +1288,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, return 0; } - if (slave && ttm->sg) { + if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + if (!ttm->sg) { + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + attach = gtt->gobj->import_attach; + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ttm->sg = sgt; + } + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -1286,9 +1327,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, */ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + struct amdgpu_device *adev; if (gtt && gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); @@ -1297,7 +1337,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) return; } - if (slave) + if (ttm->sg && gtt->gobj->import_attach) { + struct dma_buf_attachment *attach; + + attach = gtt->gobj->import_attach; + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); + ttm->sg = NULL; + return; + } + + if (ttm->page_flags & TTM_PAGE_FLAG_SG) return; adev = amdgpu_ttm_adev(ttm->bdev); @@ -1470,26 +1519,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; - struct reservation_object_list *flist; + struct dma_resv_list *flist; struct dma_fence *f; int i; - /* Don't evict VM page tables while they are busy, otherwise we can't - * cleanly handle page faults. - */ if (bo->type == ttm_bo_type_kernel && - !reservation_object_test_signaled_rcu(bo->resv, true)) + !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) return false; /* If bo is a KFD BO, check if the bo belongs to the current process. * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = reservation_object_get_list(bo->resv); + flist = dma_resv_get_list(bo->base.resv); if (flist) { for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(bo->resv)); + dma_resv_held(bo->base.resv)); if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -1599,6 +1645,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .move = &amdgpu_bo_move, .verify_access = &amdgpu_verify_access, .move_notify = &amdgpu_bo_move_notify, + .release_notify = &amdgpu_bo_release_notify, .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, @@ -1632,81 +1679,96 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) */ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - u64 vram_size = adev->gmc.visible_vram_size; - u64 offset = adev->fw_vram_usage.start_offset; - u64 size = adev->fw_vram_usage.size; - struct amdgpu_bo *bo; - - memset(&bp, 0, sizeof(bp)); - bp.size = adev->fw_vram_usage.size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + uint64_t vram_size = adev->gmc.visible_vram_size; + adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { + if (adev->fw_vram_usage.size == 0 || + adev->fw_vram_usage.size > vram_size) + return 0; - r = amdgpu_bo_create(adev, &bp, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; + return amdgpu_bo_create_kernel_at(adev, + adev->fw_vram_usage.start_offset, + adev->fw_vram_usage.size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); +} - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; +/* + * Memoy training reservation functions + */ - /* remove the original mem node and create a new one at the - * request position - */ - bo = adev->fw_vram_usage.reserved_bo; - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } +/** + * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram + * + * @adev: amdgpu_device pointer + * + * free memory training reserved vram if it has been reserved. + */ +static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) +{ + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, - &bo->tbo.mem, &ctx); - if (r) - goto error_pin; + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); + ctx->c2p_bo = NULL; - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size)); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; + return 0; +} + +static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size) +{ + if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) ) + vram_size -= SZ_1M; + + return ALIGN(vram_size, SZ_1M); +} + +/** + * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from memory training. + */ +static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) +{ + int ret; + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + + memset(ctx, 0, sizeof(*ctx)); + if (!adev->fw_vram_usage.mem_train_support) { + DRM_DEBUG("memory training does not support!\n"); + return 0; + } + + ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size); + ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); + ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + + ret = amdgpu_bo_create_kernel_at(adev, + ctx->c2p_train_data_offset, + ctx->train_data_size, + AMDGPU_GEM_DOMAIN_VRAM, + &ctx->c2p_bo, + NULL); + if (ret) { + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; } - return r; -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; + return 0; } + /** * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1721,6 +1783,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) uint64_t gtt_size; int r; u64 vis_vram_limit; + void *stolen_vga_buf; mutex_init(&adev->mman.gtt_window_lock); @@ -1728,7 +1791,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, - adev->need_dma32); + adev->ddev->vma_offset_manager, + dma_addressing_limited(adev->dev)); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; @@ -1768,6 +1832,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + /* + *The reserved vram for memory training must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_ttm_training_reserve_vram_init(adev); + if (r) + return r; + /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS @@ -1775,9 +1847,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, - NULL, NULL); + NULL, &stolen_vga_buf); if (r) return r; + + /* + * reserve one TMR (64K) memory at the top of VRAM which holds + * IP Discovery data and is protected by PSP. + */ + r = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, + DISCOVERY_TMR_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); + if (r) + return r; + DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1839,8 +1925,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) */ void amdgpu_ttm_late_init(struct amdgpu_device *adev) { + void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); } /** @@ -1852,7 +1939,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; @@ -1888,11 +1979,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) if (enable) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->mman.entity, + DRM_SCHED_PRIORITY_KERNEL, &sched, + 1, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", r); @@ -1948,10 +2041,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; - num_dw = adev->mman.buffer_funcs->copy_num_dw; - while (num_dw & 0x7) - num_dw++; - + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); @@ -1992,7 +2082,7 @@ error_free: int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush) { @@ -2011,11 +2101,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); - num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; - - /* for IB padding */ - while (num_dw & 0x7) - num_dw++; + num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); if (r) @@ -2066,7 +2152,7 @@ error_free: int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index caa76c693700..0dddedc06ae3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,6 +38,8 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_POISON 0xd0bed0be + struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; @@ -83,18 +85,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, uint64_t size, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index bfaa0eac3213..9ef312428231 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -83,8 +83,8 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) const struct smc_firmware_header_v2_0 *v2_hdr = container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); - DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); - DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); + DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); + DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } @@ -269,6 +269,16 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) DRM_DEBUG("kdb_size_bytes: %u\n", le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); } + if (version_minor == 2) { + const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = + container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); + } } else { DRM_ERROR("Unknown PSP ucode version: %u.%u\n", version_major, version_minor); @@ -350,11 +360,16 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_RENOIR: case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + default: DRM_ERROR("Unknown firmware load type\n"); } @@ -432,6 +447,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL; + const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -445,6 +461,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, header = (const struct common_firmware_header *)ucode->fw->data; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data; + dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && @@ -455,7 +472,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM && - ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) { + ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV && + ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -491,6 +509,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(dmcu_hdr->intv_offset_bytes)), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) { + ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes); + memcpy(ucode->kaddr, + (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index c1fb6dc86440..b0e656409c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -90,6 +90,15 @@ struct psp_firmware_header_v1_1 { uint32_t kdb_size_bytes; }; +/* version_major=1, version_minor=2 */ +struct psp_firmware_header_v1_2 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t reserve[3]; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -99,6 +108,12 @@ struct ta_firmware_header_v1_0 { uint32_t ta_ras_ucode_version; uint32_t ta_ras_offset_bytes; uint32_t ta_ras_size_bytes; + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_offset_bytes; + uint32_t ta_hdcp_size_bytes; + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_offset_bytes; + uint32_t ta_dtm_size_bytes; }; /* version_major=1, version_minor=0 */ @@ -236,6 +251,13 @@ struct dmcu_firmware_header_v1_0 { uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */ }; +/* version_major=1, version_minor=0 */ +struct dmcub_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t inst_const_bytes; /* size of instruction region, in bytes */ + uint32_t bss_data_bytes; /* size of bss/data region, in bytes */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; @@ -253,6 +275,7 @@ union amdgpu_firmware_header { struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; struct dmcu_firmware_header_v1_0 dmcu; + struct dmcub_firmware_header_v1_0 dmcub; uint8_t raw[0x100]; }; @@ -262,6 +285,12 @@ union amdgpu_firmware_header { enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SDMA0 = 0, AMDGPU_UCODE_ID_SDMA1, + AMDGPU_UCODE_ID_SDMA2, + AMDGPU_UCODE_ID_SDMA3, + AMDGPU_UCODE_ID_SDMA4, + AMDGPU_UCODE_ID_SDMA5, + AMDGPU_UCODE_ID_SDMA6, + AMDGPU_UCODE_ID_SDMA7, AMDGPU_UCODE_ID_CP_CE, AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, @@ -271,20 +300,22 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_CP_MES, AMDGPU_UCODE_ID_CP_MES_DATA, - AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, + AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, AMDGPU_UCODE_ID_UVD1, AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1, AMDGPU_UCODE_ID_DMCU_ERAM, AMDGPU_UCODE_ID_DMCU_INTV, AMDGPU_UCODE_ID_VCN0_RAM, AMDGPU_UCODE_ID_VCN1_RAM, + AMDGPU_UCODE_ID_DMCUB, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c new file mode 100644 index 000000000000..f4d40855147b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -0,0 +1,152 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras.h" + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_umc_process_ras_data_cb, + }; + + if (!adev->umc.ras_if) { + adev->umc.ras_if = + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->umc.ras_if) + return -ENOMEM; + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if->sub_block_index = 0; + strcpy(adev->umc.ras_if->name, "umc"); + } + ih_info.head = fs_info.head = *adev->umc.ras_if; + + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + /* ras init of specific umc version */ + if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) + adev->umc.funcs->err_cnt_init(adev); + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); +free: + kfree(adev->umc.ras_if); + adev->umc.ras_if = NULL; + return r; +} + +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->umc.ras_if) { + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_umc_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + DRM_WARN("Failed to alloc memory for umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.funcs->query_ras_error_address(adev, ras_error_status); + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + if (err_data->err_addr_cnt && + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt)) + DRM_WARN("Failed to add ras bad page!\n"); + + amdgpu_ras_reset_gpu(adev); + } + + kfree(err_data->err_addr); + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h new file mode 100644 index 000000000000..a615a1eb750b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_UMC_H__ +#define __AMDGPU_UMC_H__ + +struct amdgpu_umc_funcs { + void (*err_cnt_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + void (*init_registers)(struct amdgpu_device *adev); +}; + +struct amdgpu_umc { + /* max error count in one ras query call */ + uint32_t max_ras_err_cnt_per_query; + /* number of umc channel instance with memory map register access */ + uint32_t channel_inst_num; + /* number of umc instance with memory map register access */ + uint32_t umc_inst_num; + /* UMC regiser per channel offset */ + uint32_t channel_offs; + /* channel index table of interleaved memory */ + const uint32_t *channel_idx_tbl; + struct ras_common_if *ras_if; + + const struct amdgpu_umc_funcs *funcs; +}; + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_umc_ras_fini(struct amdgpu_device *adev); +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 5b2fea3b4a2c..a92f3b18e657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -39,6 +39,8 @@ #include "cikd.h" #include "uvd/uvd_4_2_d.h" +#include "amdgpu_ras.h" + /* 1 second timeout */ #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -297,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { @@ -327,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) int amdgpu_uvd_entity_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; int r; ring = &adev->uvd.inst[0].ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); if (r) { DRM_ERROR("Failed setting up UVD kernel entity.\n"); return r; @@ -346,6 +350,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) unsigned size; void *ptr; int i, j; + bool in_ras_intr = amdgpu_ras_intr_triggered(); cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -372,8 +377,16 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + /* re-write 0 since err_event_athub will corrupt VCPU buffer */ + if (in_ras_intr) + memset(adev->uvd.inst[j].saved_bo, 0, size); + else + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); } + + if (in_ras_intr) + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + return 0; } @@ -1073,7 +1086,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, msecs_to_jiffies(10)); if (r == 0) @@ -1085,7 +1098,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; } else { - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) goto err_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b70b3c45bb29..59ddba137946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -80,6 +80,11 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12); MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, + struct dma_fence **fence); +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence); /** * amdgpu_vce_init - allocate memory, load vce firmware @@ -211,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, @@ -234,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) int amdgpu_vce_entity_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; int r; ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -428,14 +435,15 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * Open up a stream for HW test */ -int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -444,7 +452,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); /* stitch together an VCE create msg */ ib->length_dw = 0; @@ -476,8 +484,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000001; for (i = ib->length_dw; i < ib_size_dw; ++i) @@ -507,8 +515,8 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; @@ -644,7 +652,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if ((addr + (uint64_t)size) > (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", + DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } @@ -1110,13 +1118,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; - r = amdgpu_vce_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL); if (r) goto error; @@ -1132,5 +1147,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 30ea54dd9117..d6d83a3ec803 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -58,10 +58,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); -int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence); -int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2e12eeb314a7..f96464e2c157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -28,30 +28,29 @@ #include <linux/module.h> #include <linux/pci.h> -#include <drm/drm.h> - #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vcn.h" #include "soc15d.h" -#include "soc15_common.h" - -#include "vcn/vcn_1_0_offset.h" -#include "vcn/vcn_1_0_sh_mask.h" - -/* 1 second timeout */ -#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) /* Firmware Names */ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" +#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_ARCTURUS); +MODULE_FIRMWARE(FIRMWARE_RENOIR); MODULE_FIRMWARE(FIRMWARE_NAVI10); +MODULE_FIRMWARE(FIRMWARE_NAVI14); +MODULE_FIRMWARE(FIRMWARE_NAVI12); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -61,7 +60,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; - int r; + int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -74,12 +73,36 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) else fw_name = FIRMWARE_RAVEN; break; + case CHIP_ARCTURUS: + fw_name = FIRMWARE_ARCTURUS; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_RENOIR: + fw_name = FIRMWARE_RENOIR; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; case CHIP_NAVI10: fw_name = FIRMWARE_NAVI10; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_NAVI14: + fw_name = FIRMWARE_NAVI14; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI12: + fw_name = FIRMWARE_NAVI12; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -133,22 +156,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); - return r; - } - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); if (r) { - dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); return r; } + + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } + } } return 0; @@ -156,26 +185,29 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { - int i; - - kvfree(adev->vcn.saved_bo); + int i, j; - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); - } + cancel_delayed_work_sync(&adev->vcn.idle_work); - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, - (void **)&adev->vcn.cpu_addr); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + } + kvfree(adev->vcn.inst[j].saved_bo); - amdgpu_ring_fini(&adev->vcn.ring_dec); + amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, + &adev->vcn.inst[j].gpu_addr, + (void **)&adev->vcn.inst[j].cpu_addr); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); - amdgpu_ring_fini(&adev->vcn.ring_jpeg); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + } release_firmware(adev->vcn.fw); @@ -186,21 +218,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.vcpu_bo == NULL) - return 0; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.saved_bo) - return -ENOMEM; - - memcpy_fromio(adev->vcn.saved_bo, ptr, size); + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + } return 0; } @@ -208,32 +244,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->vcn.vcpu_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; - - if (adev->vcn.saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.saved_bo, size); - kvfree(adev->vcn.saved_bo); - adev->vcn.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; + + if (adev->vcn.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); } - memset_io(ptr, 0, size); } - return 0; } @@ -241,39 +281,36 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned int fences = 0; - unsigned int i; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); - } + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - struct dpg_pause_state new_state; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } - if (fences) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + if (fence[j]) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); - } + adev->vcn.pause_dpg_mode(adev, j, &new_state); + } - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); + fences += fence[j]; + } if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); - else - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -286,11 +323,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); - else - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -299,24 +333,17 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } if (fences) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) new_state.fw_based = VCN_DPG_STATE__PAUSE; - else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } } @@ -332,7 +359,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -340,7 +367,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -466,9 +493,14 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) { + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; long r; + /* temporarily disable ib test for sriov */ + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); if (r) goto error; @@ -517,13 +549,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -531,14 +564,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -569,13 +602,14 @@ err: } static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -583,14 +617,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -622,129 +656,38 @@ err: int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); - if (r) - goto error; - - r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); - if (r) - goto error; - - r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) - r = -ETIMEDOUT; - else if (r > 0) - r = 0; - -error: - dma_fence_put(fence); - return r; -} - -int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t tmp = 0; - unsigned i; - int r; + /* temporarily disable ib test for sriov */ + if (amdgpu_sriov_vf(adev)) + return 0; - WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); - r = amdgpu_ring_alloc(ring, 3); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0)); - amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_commit(ring); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); - if (tmp == 0xDEADBEEF) - break; - udelay(1); - } - - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - - return r; -} - -static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_job *job; - struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - const unsigned ib_size_dw = 16; - int i, r; - - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); if (r) - return r; - - ib = &job->ibs[0]; - - ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); - ib->ptr[1] = 0xDEADBEEF; - for (i = 2; i < 16; i += 2) { - ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); - ib->ptr[i+1] = 0; - } - ib->length_dw = 16; - - r = amdgpu_job_submit_direct(job, ring, &f); - if (r) - goto err; - - if (fence) - *fence = dma_fence_get(f); - dma_fence_put(f); - - return 0; - -err: - amdgpu_job_free(job); - return r; -} - -int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t tmp = 0; - unsigned i; - struct dma_fence *fence = NULL; - long r = 0; + goto error; - r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { + if (r == 0) r = -ETIMEDOUT; - goto error; - } else if (r < 0) { - goto error; - } else { + else if (r > 0) r = 0; - } - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); - if (tmp == 0xDEADBEEF) - break; - udelay(1); - } - - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - dma_fence_put(fence); error: + dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 19661c645703..6fe057329de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -30,6 +30,12 @@ #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define AMDGPU_MAX_VCN_INSTANCES 2 +#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + #define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 @@ -51,33 +57,41 @@ #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) -#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \ +#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ bool video_range, aon_range; \ \ - addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ @@ -95,27 +109,27 @@ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ - ({ \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ +#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.dpg_sram_curr_addr++ = offset; \ - *adev->vcn.dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ } while (0) enum engine_status_constants { @@ -146,36 +160,52 @@ struct amdgpu_vcn_reg{ unsigned data1; unsigned cmd; unsigned nop; + unsigned context_id; + unsigned ib_vmid; + unsigned ib_bar_low; + unsigned ib_bar_high; + unsigned ib_size; + unsigned gp_scratch8; unsigned scratch9; - unsigned jpeg_pitch; }; -struct amdgpu_vcn { +struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; - struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; - unsigned num_enc_rings; - enum amd_powergating_state cur_state; - struct dpg_pause_state pause_state; - struct amdgpu_vcn_reg internal, external; - int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); - - bool indirect_sram; + struct amdgpu_vcn_reg external; struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; }; +struct amdgpu_vcn { + unsigned fw_version; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + unsigned num_enc_rings; + enum amd_powergating_state cur_state; + bool indirect_sram; + + uint8_t num_vcn_inst; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS]; + struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; + uint32_t num_vcn_enc_sched; + uint32_t num_vcn_dec_sched; + + unsigned harvest_config; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +}; + int amdgpu_vcn_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); int amdgpu_vcn_suspend(struct amdgpu_device *adev); @@ -189,7 +219,4 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); -int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); -int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 59dd204498c5..adc813cde8e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -45,98 +45,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->pg_flags = 0; } -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_rreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_read; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_read; - - return adev->wb.wb[adev->virt.reg_val_offs]; - -failed_kiq_read: - pr_err("failed to read reg:%x\n", reg); - return ~0; -} - -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_wreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_write; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_write; - - return; - -failed_kiq_write: - pr_err("failed to write reg:%x\n", reg); -} - void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) @@ -379,99 +287,3 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) } } } - -static uint32_t parse_clk(char *buf, bool min) -{ - char *ptr = buf; - uint32_t clk = 0; - - do { - ptr = strchr(ptr, ':'); - if (!ptr) - break; - ptr+=2; - if (kstrtou32(ptr, 10, &clk)) - return 0; - } while (!min); - - return clk * 100; -} - -uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest) -{ - char *buf = NULL; - uint32_t clk = 0; - - buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); - clk = parse_clk(buf, lowest); - - kfree(buf); - - return clk; -} - -uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) -{ - char *buf = NULL; - uint32_t clk = 0; - - buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); - clk = parse_clk(buf, lowest); - - kfree(buf); - - return clk; -} - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) -{ - struct amdgpu_virt *virt = &adev->virt; - - if (virt->ops && virt->ops->init_reg_access_mode) - virt->ops->init_reg_access_mode(adev); -} - -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) - && !(amdgpu_sriov_runtime(adev))) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) - ret = true; - - return ret; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index f5107731e9c4..daaf909d009a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; -/* According to the fw feature, some new reg access modes are supported */ -#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ -#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ -#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ -#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ - /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -63,9 +57,6 @@ struct amdgpu_virt_ops { int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); - int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); - int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); - void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -92,8 +83,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, /* VRAM LOST by GIM */ AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, - /* HW PERF SIM in GIM */ - AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3), + /* PP ONE VF MODE in GIM */ + AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), }; struct amd_sriov_msg_pf2vf_info_header { @@ -264,8 +255,6 @@ struct amdgpu_virt { struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; uint32_t gim_feature; - /* protect DPM events to GIM */ - struct mutex dpm_mutex; uint32_t reg_access_mode; }; @@ -293,13 +282,11 @@ static inline bool is_virtual_machine(void) #endif } -#define amdgim_is_hwperf(adev) \ - ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION) +#define amdgpu_sriov_is_pp_one_vf(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, uint32_t ref, uint32_t mask); @@ -313,12 +300,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); -uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); -uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 24c3c05e2fb7..d16231d6a790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -83,6 +83,32 @@ struct amdgpu_prt_cb { }; /** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + +/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer @@ -130,7 +156,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift; + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; @@ -302,7 +329,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return; vm->bulk_moveable = false; @@ -341,7 +368,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); } -/** +/* * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt */ struct amdgpu_vm_pt_cursor { @@ -482,6 +509,7 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, * * @adev: amdgpu_device structure * @vm: amdgpu_vm structure + * @start: optional cursor to start with * @cursor: state to initialize * * Starts a deep first traversal of the PD/PT tree. @@ -535,7 +563,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, amdgpu_vm_pt_ancestor(cursor); } -/** +/* * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs */ #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ @@ -560,12 +588,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - /* One for the VM updates, one for TTM and one for the CS job */ - entry->tv.num_shared = 3; + /* One for TTM and one for the CS job */ + entry->tv.num_shared = 2; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } +/** + * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag + * + * @bo: BO which was removed from the LRU + * + * Make sure the bulk_moveable flag is updated when a BO is removed from the + * LRU. + */ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) { struct amdgpu_bo *abo; @@ -583,7 +619,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (abo->tbo.resv == vm->root.base.bo->tbo.resv) + if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; } @@ -600,21 +636,18 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; -#if 0 if (vm->bulk_moveable) { - spin_lock(&glob->lru_lock); + spin_lock(&ttm_bo_glob.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&glob->lru_lock); + spin_unlock(&ttm_bo_glob.lru_lock); return; } -#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); - spin_lock(&glob->lru_lock); + spin_lock(&ttm_bo_glob.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo; @@ -626,7 +659,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, ttm_bo_move_to_lru_tail(&bo->shadow->tbo, &vm->lru_bulk_move); } - spin_unlock(&glob->lru_lock); + spin_unlock(&ttm_bo_glob.lru_lock); vm->bulk_moveable = true; } @@ -649,7 +682,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param) { struct amdgpu_vm_bo_base *bo_base, *tmp; - int r = 0; + int r; vm->bulk_moveable &= list_empty(&vm->evicted); @@ -658,7 +691,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = validate(param, bo); if (r) - break; + return r; if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -671,7 +704,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - return r; + amdgpu_vm_eviction_lock(vm); + vm->evicting = false; + amdgpu_vm_eviction_unlock(vm); + + return 0; } /** @@ -695,6 +732,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear + * @direct: use a direct update * * Root PD needs to be reserved when calling this. * @@ -703,7 +741,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo) + struct amdgpu_bo *bo, + bool direct) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -762,6 +801,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL); if (r) @@ -815,10 +855,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requesting vm + * @level: the page table level + * @direct: use a direct update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, struct amdgpu_bo_param *bp) + int level, bool direct, + struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -833,8 +876,9 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; + bp->no_wait_gpu = direct; if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.resv; + bp->resv = vm->root.base.bo->tbo.base.resv; } /** @@ -843,6 +887,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate + * @direct: use a direct update * * Make sure a specific page table or directory is allocated. * @@ -852,7 +897,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor) + struct amdgpu_vm_pt_cursor *cursor, + bool direct) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -873,7 +919,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -885,7 +931,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt); + r = amdgpu_vm_clear_bo(adev, vm, pt, direct); if (r) goto error_free_pt; @@ -1022,7 +1068,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * Returns: * 0 on success, errno otherwise. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -1036,10 +1083,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; - bool pasid_mapping_needed = id->pasid != job->pasid || - !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping); struct dma_fence *fence = NULL; + bool pasid_mapping_needed = false; unsigned patch_offset = 0; int r; @@ -1049,6 +1094,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ pasid_mapping_needed = true; } + mutex_lock(&id_mgr->lock); + if (id->pasid != job->pasid || !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping)) + pasid_mapping_needed = true; + mutex_unlock(&id_mgr->lock); + gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; @@ -1088,9 +1139,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } if (pasid_mapping_needed) { + mutex_lock(&id_mgr->lock); id->pasid = job->pasid; dma_fence_put(id->pasid_mapping); id->pasid_mapping = dma_fence_get(fence); + mutex_unlock(&id_mgr->lock); } dma_fence_put(fence); @@ -1174,10 +1227,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/* +/** * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @param: parameters for the update + * @params: parameters for the update * @vm: requested vm * @entry: entry to update * @@ -1201,7 +1254,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); } -/* +/** * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer @@ -1220,19 +1273,20 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, amdgpu_vm_bo_relocated(&entry->base); } -/* - * amdgpu_vm_update_directories - make sure that all directories are valid +/** + * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer * @vm: requested vm + * @direct: submit directly to the paging queue * * Makes sure all directories are up to date. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct) { struct amdgpu_vm_update_params params; int r; @@ -1243,6 +1297,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL); if (r) @@ -1270,7 +1325,7 @@ error: return r; } -/** +/* * amdgpu_vm_update_flags - figure out flags for PTE updates * * Make sure to set the right flags for the PTEs at the desired level. @@ -1393,7 +1448,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); + /* make sure that the page tables covering the address range are + * actually allocated + */ + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, + params->direct); if (r) return r; @@ -1465,7 +1524,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries */ + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ while (cursor.pfn < frag_start) { amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); @@ -1484,13 +1548,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer - * @exclusive: fence we need to sync to - * @pages_addr: DMA addresses to use for mapping * @vm: requested vm + * @direct: direct submission in a page fault + * @exclusive: fence we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @addr: addr to set the area to + * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. @@ -1499,11 +1564,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct, struct dma_fence *exclusive, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, + dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; @@ -1513,21 +1578,32 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; params.pages_addr = pages_addr; /* sync to everything except eviction fences on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; + amdgpu_vm_eviction_lock(vm); + if (vm->evicting) { + r = -EBUSY; + goto error_unlock; + } + r = vm->update_funcs->prepare(¶ms, owner, exclusive); if (r) - return r; + goto error_unlock; r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) - return r; + goto error_unlock; + + r = vm->update_funcs->commit(¶ms, fence); - return vm->update_funcs->commit(¶ms, fence); +error_unlock: + amdgpu_vm_eviction_unlock(vm); + return r; } /** @@ -1571,27 +1647,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) flags &= ~AMDGPU_PTE_WRITEABLE; - flags &= ~AMDGPU_PTE_EXECUTABLE; - flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - - if (adev->asic_type == CHIP_NAVI10) { - flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); - } else { - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK); - } - - if ((mapping->flags & AMDGPU_PTE_PRT) && - (adev->asic_type >= CHIP_VEGA10)) { - flags |= AMDGPU_PTE_PRT; - if (adev->asic_type >= CHIP_NAVI10) { - flags |= AMDGPU_PTE_SNOOPED; - flags |= AMDGPU_PTE_LOG; - flags |= AMDGPU_PTE_SYSTEM; - } - flags &= ~AMDGPU_PTE_VALID; - } + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, mapping, &flags); trace_amdgpu_vm_bo_update(mapping); @@ -1635,7 +1692,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; + max_entries = count * + AMDGPU_GPU_PAGES_IN_CPU_PAGE; } } else if (flags & AMDGPU_PTE_VALID) { @@ -1644,9 +1702,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive, start, last, flags, addr, - fence); + dma_addr, fence); if (r) return r; @@ -1674,8 +1732,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * Returns: * 0 for success, -EINVAL for failure. */ -int amdgpu_vm_bo_update(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; @@ -1702,7 +1759,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo->tbo.resv); + exclusive = bo->tbo.moving; } if (bo) { @@ -1712,7 +1769,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, flags = 0x0; } - if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) + if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1733,20 +1790,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, return r; } - if (vm->use_cpu_for_update) { - /* Flush HDP */ - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } - /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { uint32_t mem_type = bo->tbo.mem.mem_type; - if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) + if (!(bo->preferred_domains & + amdgpu_mem_type_to_domain(mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); @@ -1879,18 +1931,18 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct reservation_object *resv = vm->root.base.bo->tbo.resv; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; - r = reservation_object_get_fences_rcu(resv, &excl, + r = dma_resv_get_fences_rcu(resv, &excl, &shared_count, &shared); if (r) { /* Not enough memory to grab the fence list, as last resort * block for all the fences to complete. */ - reservation_object_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout_rcu(resv, true, false, MAX_SCHEDULE_TIMEOUT); return; } @@ -1940,9 +1992,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL, mapping->start, mapping->last, - init_pte_value, 0, &f); + init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -1978,7 +2030,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va *bo_va, *tmp; - struct reservation_object *resv; + struct dma_resv *resv; bool clear; int r; @@ -1993,11 +2045,11 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.resv; + resv = bo_va->base.bo->tbo.base.resv; spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) clear = false; /* Somebody else is using the BO right now */ else @@ -2008,7 +2060,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, return r; if (!clear) - reservation_object_unlock(resv); + dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } spin_unlock(&vm->invalidated_lock); @@ -2084,7 +2136,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && !bo_va->base.moved) { list_move(&bo_va->base.vm_status, &vm->moved); } @@ -2416,7 +2468,8 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) struct amdgpu_bo *bo; bo = mapping->bo_va->base.bo; - if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) + if (dma_resv_locking_ctx(bo->tbo.base.resv) != + ticket) continue; } @@ -2443,7 +2496,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm_bo_base **base; if (bo) { - if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; for (base = &bo_va->base.bo->vm_bo; *base; @@ -2487,6 +2540,41 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, } /** + * amdgpu_vm_evictable - check if we can evict a VM + * + * @bo: A page table of the VM. + * + * Check if it is possible to evict a VM. + */ +bool amdgpu_vm_evictable(struct amdgpu_bo *bo) +{ + struct amdgpu_vm_bo_base *bo_base = bo->vm_bo; + + /* Page tables of a destroyed VM can go away immediately */ + if (!bo_base || !bo_base->vm) + return true; + + /* Don't evict VM page tables while they are busy */ + if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true)) + return false; + + /* Try to block ongoing updates */ + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) + return false; + + /* Don't evict VM page tables while they are updated */ + if (!dma_fence_is_signaled(bo_base->vm->last_direct) || + !dma_fence_is_signaled(bo_base->vm->last_delayed)) { + amdgpu_vm_eviction_unlock(bo_base->vm); + return false; + } + + bo_base->vm->evicting = true; + amdgpu_vm_eviction_unlock(bo_base->vm); + return true; +} + +/** * amdgpu_vm_bo_invalidate - mark the bo as invalid * * @adev: amdgpu_device pointer @@ -2507,7 +2595,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2518,7 +2606,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2648,8 +2736,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, - true, true, timeout); + timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, + true, true, timeout); + if (timeout <= 0) + return timeout; + + timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout); + if (timeout <= 0) + return timeout; + + return dma_fence_wait_timeout(vm->last_delayed, true, timeout); } /** @@ -2683,13 +2779,22 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); - /* create scheduler entity for page table updates */ - r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, - adev->vm_manager.vm_pte_num_rqs, NULL); + + /* create scheduler entities for page table updates */ + r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); if (r) return r; + r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); + if (r) + goto error_free_direct; + vm->pte_support_ats = false; + vm->is_compute_context = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2703,7 +2808,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->use_cpu_for_update) @@ -2711,25 +2817,30 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; + vm->last_direct = dma_fence_get_stub(); + vm->last_delayed = dma_fence_get_stub(); + + mutex_init(&vm->eviction_lock); + vm->evicting = false; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) - goto error_free_sched_entity; + goto error_free_delayed; r = amdgpu_bo_reserve(root, true); if (r) goto error_free_root; - r = reservation_object_reserve_shared(root->tbo.resv, 1); + r = dma_resv_reserve_shared(root->tbo.base.resv, 1); if (r) goto error_unreserve; amdgpu_vm_bo_base_init(&vm->root.base, vm, root); - r = amdgpu_vm_clear_bo(adev, vm, root); + r = amdgpu_vm_clear_bo(adev, vm, root, false); if (r) goto error_unreserve; @@ -2760,8 +2871,13 @@ error_free_root: amdgpu_bo_unref(&vm->root.base.bo); vm->root.base.bo = NULL; -error_free_sched_entity: - drm_sched_entity_destroy(&vm->entity); +error_free_delayed: + dma_fence_put(vm->last_direct); + dma_fence_put(vm->last_delayed); + drm_sched_entity_destroy(&vm->delayed); + +error_free_direct: + drm_sched_entity_destroy(&vm->direct); return r; } @@ -2802,6 +2918,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -2817,7 +2934,8 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -2849,7 +2967,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo); + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); if (r) goto free_idr; } @@ -2859,9 +2977,18 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns AMDGPU_VM_USE_CPU_FOR_COMPUTE); DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; + dma_fence_put(vm->last_update); + vm->last_update = NULL; + vm->is_compute_context = true; + if (vm->pasid) { unsigned long flags; @@ -2915,6 +3042,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } vm->pasid = 0; + vm->is_compute_context = false; } /** @@ -2931,31 +3059,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - int i, r; + int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + root = amdgpu_bo_ref(vm->root.base.bo); + amdgpu_bo_reserve(root, true); if (vm->pasid) { unsigned long flags; spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + vm->pasid = 0; } - drm_sched_entity_destroy(&vm->entity); + dma_fence_wait(vm->last_direct, false); + dma_fence_put(vm->last_direct); + dma_fence_wait(vm->last_delayed, false); + dma_fence_put(vm->last_delayed); - if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { - dev_err(adev->dev, "still active bo inside vm\n"); - } - rbtree_postorder_for_each_entry_safe(mapping, tmp, - &vm->va.rb_root, rb) { - /* Don't remove the mapping here, we don't want to trigger a - * rebalance and the tree is about to be destroyed anyway. - */ - list_del(&mapping->list); - kfree(mapping); - } list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); @@ -2966,16 +3089,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - root = amdgpu_bo_ref(vm->root.base.bo); - r = amdgpu_bo_reserve(root, true); - if (r) { - dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); - } else { - amdgpu_vm_free_pts(adev, vm, NULL); - amdgpu_bo_unreserve(root); - } + amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); WARN_ON(vm->root.base.bo); + + drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->delayed); + + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { + dev_err(adev->dev, "still active bo inside vm\n"); + } + rbtree_postorder_for_each_entry_safe(mapping, tmp, + &vm->va.rb_root, rb) { + /* Don't remove the mapping here, we don't want to trigger a + * rebalance and the tree is about to be destroyed anyway. + */ + list_del(&mapping->list); + kfree(mapping); + } + dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vmid_free_reserved(adev, vm, i); @@ -3059,13 +3192,14 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: - /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + /* We only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, + AMDGPU_GFXHUB_0); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: return -EINVAL; @@ -3103,13 +3237,97 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, */ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) { - if (!vm->task_info.pid) { - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); + if (vm->task_info.pid) + return; - if (current->group_leader->mm == current->mm) { - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); - } + vm->task_info.pid = current->pid; + get_task_comm(vm->task_info.task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info.tgid = current->group_leader->pid; + get_task_comm(vm->task_info.process_name, current->group_leader); +} + +/** + * amdgpu_vm_handle_fault - graceful handling of VM faults. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault + * + * Try to gracefully handle a VM fault. Return true if the fault was handled and + * shouldn't be reported any more. + */ +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) +{ + struct amdgpu_bo *root; + uint64_t value, flags; + struct amdgpu_vm *vm; + long r; + + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm) + root = amdgpu_bo_ref(vm->root.base.bo); + else + root = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); + + if (!root) + return false; + + r = amdgpu_bo_reserve(root, true); + if (r) + goto error_unref; + + /* Double check that the VM still exists */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm && vm->root.base.bo != root) + vm = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); + if (!vm) + goto error_unlock; + + addr /= AMDGPU_GPU_PAGE_SIZE; + flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | + AMDGPU_PTE_SYSTEM; + + if (vm->is_compute_context) { + /* Intentionally setting invalid PTE flag + * combination to force a no-retry-fault + */ + flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | + AMDGPU_PTE_TF; + value = 0; + + } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + /* Redirect the access to the dummy page */ + value = adev->dummy_page_addr; + flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | + AMDGPU_PTE_WRITEABLE; + + } else { + /* Let the hw retry silently on the PTE */ + value = 0; } + + r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, + flags, value, NULL, NULL); + if (r) + goto error_unlock; + + r = amdgpu_vm_update_pdes(adev, vm, true); + +error_unlock: + amdgpu_bo_unreserve(root); + if (r < 0) + DRM_ERROR("Can't handle page fault (%ld)\n", r); + +error_unref: + amdgpu_bo_unref(&root); + + return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 489a162ca620..b4640ab38c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> +#include <linux/sched/mm.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -90,7 +91,7 @@ struct amdgpu_bo_list_entry; | AMDGPU_PTE_WRITEABLE \ | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) -/* NAVI10 only */ +/* gfx10 */ #define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) #define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) @@ -99,10 +100,14 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 +/* Reserve 4MB VRAM for page tables */ +#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) + /* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_GFXHUB 0 -#define AMDGPU_MMHUB 1 +#define AMDGPU_MAX_VMHUBS 3 +#define AMDGPU_GFXHUB_0 0 +#define AMDGPU_MMHUB_0 1 +#define AMDGPU_MMHUB_1 2 /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) @@ -198,6 +203,11 @@ struct amdgpu_vm_update_params { struct amdgpu_vm *vm; /** + * @direct: if changes should be made directly + */ + bool direct; + + /** * @pages_addr: * * DMA addresses to use for mapping @@ -230,6 +240,13 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ + struct mutex eviction_lock; + bool evicting; + unsigned int saved_flags; + /* BOs who needs a validation */ struct list_head evicted; @@ -253,8 +270,13 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* Scheduler entity for page table updates */ - struct drm_sched_entity entity; + /* Scheduler entities for page table updates */ + struct drm_sched_entity direct; + struct drm_sched_entity delayed; + + /* Last submission to the scheduler entities */ + struct dma_fence *last_direct; + struct dma_fence *last_delayed; unsigned int pasid; /* dedicated to vm */ @@ -288,6 +310,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ bool bulk_moveable; + /* Flag to indicate if VM is used for compute */ + bool is_compute_context; }; struct amdgpu_vm_manager { @@ -307,8 +331,8 @@ struct amdgpu_vm_manager { u64 vram_base_offset; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; - struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS]; - unsigned vm_pte_num_rqs; + struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS]; + unsigned vm_pte_num_scheds; struct amdgpu_ring *page_fault; /* partial resident texture handling */ @@ -356,8 +380,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm); +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); @@ -366,6 +390,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); +bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); @@ -403,6 +428,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info); +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 5222d165abfc..73fec7a0ced5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -49,13 +49,6 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, { int r; - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); - if (unlikely(r)) - return r; - /* Wait for any BO move to be completed */ if (exclusive) { r = dma_fence_wait(exclusive, true); @@ -63,7 +56,14 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, return r; } - return 0; + /* Don't wait for submissions during page fault */ + if (p->direct) + return 0; + + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); } /** @@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, pe += (unsigned long)amdgpu_bo_kptr(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); for (i = 0; i < count; i++) { value = p->pages_addr ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index ddd181f5ed37..19b7f80758f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -68,17 +68,19 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, if (r) return r; - r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); - if (r) - return r; + p->num_dw_left = ndw; - r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, - owner, false); + /* Wait for moves to be completed */ + r = amdgpu_sync_fence(&p->job->sync, exclusive, false); if (r) return r; - p->num_dw_left = ndw; - return 0; + /* Don't wait for any submissions during page fault handling */ + if (p->direct) + return 0; + + return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, + owner, false); } /** @@ -93,24 +95,30 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - struct amdgpu_bo *root = p->vm->root.base.bo; struct amdgpu_ib *ib = p->job->ibs; + struct drm_sched_entity *entity; + struct dma_fence *f, *tmp; struct amdgpu_ring *ring; - struct dma_fence *f; int r; - ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); + entity = p->direct ? &p->vm->direct : &p->vm->delayed; + ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > p->num_dw_left); - r = amdgpu_job_submit(p->job, &p->vm->entity, - AMDGPU_FENCE_OWNER_VM, &f); + r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) goto error; - amdgpu_bo_fence(root, f, true); - if (fence) + tmp = dma_fence_get(f); + if (p->direct) + swap(p->vm->last_direct, tmp); + else + swap(p->vm->last_delayed, tmp); + dma_fence_put(tmp); + + if (fence && !p->direct) swap(*fence, f); dma_fence_put(f); return 0; @@ -120,7 +128,6 @@ error: return r; } - /** * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping * @@ -141,7 +148,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, src += p->num_dw_left * 4; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count); + trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } @@ -168,7 +175,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_ib *ib = p->job->ibs; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); if (count < 3) { amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 3a9d8c15fe9f..82a3299e53c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -23,6 +23,9 @@ */ #include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_atomfirmware.h" +#include "atom.h" struct amdgpu_vram_mgr { struct drm_mm mm; @@ -101,6 +104,39 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); } +static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + switch (adev->gmc.vram_vendor) { + case SAMSUNG: + return snprintf(buf, PAGE_SIZE, "samsung\n"); + case INFINEON: + return snprintf(buf, PAGE_SIZE, "infineon\n"); + case ELPIDA: + return snprintf(buf, PAGE_SIZE, "elpida\n"); + case ETRON: + return snprintf(buf, PAGE_SIZE, "etron\n"); + case NANYA: + return snprintf(buf, PAGE_SIZE, "nanya\n"); + case HYNIX: + return snprintf(buf, PAGE_SIZE, "hynix\n"); + case MOSEL: + return snprintf(buf, PAGE_SIZE, "mosel\n"); + case WINBOND: + return snprintf(buf, PAGE_SIZE, "winbond\n"); + case ESMT: + return snprintf(buf, PAGE_SIZE, "esmt\n"); + case MICRON: + return snprintf(buf, PAGE_SIZE, "micron\n"); + default: + return snprintf(buf, PAGE_SIZE, "unknown\n"); + } +} + static DEVICE_ATTR(mem_info_vram_total, S_IRUGO, amdgpu_mem_info_vram_total_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO, @@ -109,6 +145,8 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO, amdgpu_mem_info_vram_used_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, amdgpu_mem_info_vis_vram_used_show, NULL); +static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, + amdgpu_mem_info_vram_vendor, NULL); /** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM @@ -154,6 +192,11 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); return ret; } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); + return ret; + } return 0; } @@ -180,6 +223,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); + device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor); return 0; } @@ -275,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes; + uint64_t vis_usage = 0, mem_bytes, max_bytes; unsigned i; int r; @@ -283,9 +327,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; + max_bytes = adev->gmc.mc_vram_size; + if (tbo->type != ttm_bo_type_kernel) + max_bytes -= AMDGPU_VM_RESERVED_VRAM; + /* bail out quickly if there's likely not enough VRAM for this BO */ mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; - if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) { + if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { atomic64_sub(mem_bytes, &mgr->usage); mem->mm_node = NULL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d11eba09eadd..a97af422575a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,7 +25,8 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" - +#include "amdgpu_ras.h" +#include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -131,9 +132,37 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, } +#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) +static ssize_t amdgpu_xgmi_show_error(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; + uint64_t fica_out; + unsigned int error_count = 0; + + ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); + ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); -static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); + if (fica_out != 0x1f) + pr_err("xGMI error counters not enabled!\n"); + + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); + + if ((fica_out & 0xffff) == 2) + error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); + adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + + return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -148,6 +177,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, return ret; } + /* Create xgmi error file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_error); + if (ret) + pr_err("failed to create xgmi_error\n"); + + /* Create sysfs link to hive info folder on the first device */ if (adev != hive->adev) { ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, @@ -226,6 +261,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); mutex_init(&tmp->reset_lock); + task_barrier_init(&tmp->tb); if (lock) mutex_lock(&tmp->hive_lock); @@ -239,22 +275,49 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; if (!hive) return 0; - if (hive->pstate == pstate) - return 0; + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + adev->pstate = is_high_pstate ? pstate : adev->pstate; + goto out; + } dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu(adev)) - ret = smu_set_xgmi_pstate(&adev->smu, pstate); - if (ret) + ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); + if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate || is_high_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); return ret; } @@ -296,23 +359,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; - int count = 0, ret = -EINVAL; + int count = 0, ret = 0; if (!adev->gmc.xgmi.supported) return 0; - ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get node id\n"); - return ret; - } + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get hive id\n"); + return ret; + } - ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get hive id\n"); - return ret; + ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get node id\n"); + return ret; + } + } else { + adev->gmc.xgmi.hive_id = 16; + adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; } hive = amdgpu_get_xgmi_hive(adev, 1); @@ -324,6 +392,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } + /* Set default device pstate */ + adev->pstate = -1; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); @@ -332,29 +403,34 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) top_info->num_nodes = count; hive->number_devices = count; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - /* update node list for other device in the hive */ - if (tmp_adev != adev) { - top_info = &tmp_adev->psp.xgmi_context.top_info; - top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; - top_info->num_nodes = count; + task_barrier_add_task(&hive->tb); + + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = + adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; } - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - goto exit; - } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info); - if (ret) { - dev_err(tmp_adev->dev, - "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ - goto exit; + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit; + } } } @@ -391,7 +467,57 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) mutex_destroy(&hive->hive_lock); mutex_destroy(&hive->reset_lock); } else { + task_barrier_rem_task(&hive->tb); amdgpu_xgmi_sysfs_rem_dev_info(adev, hive); mutex_unlock(&hive->hive_lock); } } + +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "xgmi_wafl_err_count", + .debugfs_name = "xgmi_wafl_err_inject", + }; + + if (!adev->gmc.xgmi.supported || + adev->gmc.xgmi.num_physical_nodes == 0) + return 0; + + if (!adev->gmc.xgmi.ras_if) { + adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gmc.xgmi.ras_if) + return -ENOMEM; + adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; + adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gmc.xgmi.ras_if->sub_block_index = 0; + strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); + } + ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; + r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { + kfree(adev->gmc.xgmi.ras_if); + adev->gmc.xgmi.ras_if = NULL; + } + + return r; +} + +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && + adev->gmc.xgmi.ras_if) { + struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index fbcee31788c4..74011fbc2251 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -22,6 +22,7 @@ #ifndef __AMDGPU_XGMI_H__ #define __AMDGPU_XGMI_H__ +#include <drm/task_barrier.h> #include "amdgpu_psp.h" struct amdgpu_hive_info { @@ -33,6 +34,7 @@ struct amdgpu_hive_info { struct device_attribute dev_attr; struct amdgpu_device *adev; int pstate; /*0 -- low , 1 -- high , -1 unknown*/ + struct task_barrier tb; }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); @@ -42,6 +44,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c new file mode 100644 index 000000000000..fda99c958c3b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -0,0 +1,60 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "arct_ip_offset.h" + +int arct_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the block needed by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); + adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); + adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); + adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); + adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c new file mode 100644 index 000000000000..847ca9b3ce4e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "athub_v1_0.h" + +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h new file mode 100644 index 000000000000..b279af59e34f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V1_0_H__ +#define __ATHUB_V1_0_H__ + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 89b32b6b81c8..921a69abda55 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -74,10 +74,12 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: athub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); athub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 6858cde9fc5d..ea702a64f807 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -361,7 +361,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - struct amdgpu_connector_atom_dig *dig_connector; int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE; u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector); u8 tmp; @@ -369,8 +368,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder, if (!amdgpu_connector->con_priv) return panel_mode; - dig_connector = amdgpu_connector->con_priv; - if (dp_bridge != ENCODER_OBJECT_ID_NONE) { /* DP bridge chips */ if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux, @@ -713,7 +710,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *dig_connector; struct amdgpu_atombios_dp_link_train_info dp_info; @@ -721,7 +717,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder, if (!amdgpu_encoder->enc_priv) return; - dig = amdgpu_encoder->enc_priv; amdgpu_connector = to_amdgpu_connector(connector); if (!amdgpu_connector->con_priv) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index 980c363b1a0a..b4cc7c55fa16 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -76,11 +76,6 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, } args.lpI2CDataOut = cpu_to_le16(out); } else { - if (num > ATOM_MAX_HW_I2C_READ) { - DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num); - r = -EINVAL; - goto done; - } args.ucRegIndex = 0; args.lpI2CDataOut = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 1ffbc0d3d7a1..006f21ef7ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -966,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { {mmGRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmGRBM_STATUS_SE2}, + {mmGRBM_STATUS_SE3}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, + {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, + {mmCP_CPF_BUSY_STAT}, + {mmCP_CPF_STALLED_STAT1}, + {mmCP_CPF_STATUS}, + {mmCP_CPC_BUSY_STAT}, + {mmCP_CPC_STALLED_STAT1}, + {mmCP_CPC_STATUS}, {mmGB_ADDR_CONFIG}, {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, @@ -1270,15 +1289,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) } /** - * cik_asic_reset - soft reset GPU + * cik_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int cik_asic_reset(struct amdgpu_device *adev) +static int cik_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -1291,6 +1310,64 @@ static int cik_asic_reset(struct amdgpu_device *adev) return r; } +static bool cik_asic_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + return amdgpu_dpm_is_baco_supported(adev); + default: + return false; + } +} + +static enum amd_reset_method +cik_asic_reset_method(struct amdgpu_device *adev) +{ + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + /* disable baco reset until it works */ + /* smu7_asic_get_baco_capability(adev, &baco_reset); */ + baco_reset = false; + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * cik_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int cik_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + r = amdgpu_dpm_baco_reset(adev); + } else { + r = cik_asic_pci_config_reset(adev); + } + + return r; +} + static u32 cik_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1378,7 +1455,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) static void cik_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; - int bridge_pos, gpu_pos; u32 speed_cntl, current_data_rate; int i; u16 tmp16; @@ -1413,12 +1489,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } - bridge_pos = pci_pcie_cap(root); - if (!bridge_pos) - return; - - gpu_pos = pci_pcie_cap(adev->pdev); - if (!gpu_pos) + if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) return; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { @@ -1428,14 +1499,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, + &gpu_cfg); tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, + tmp16); tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> @@ -1459,15 +1533,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) for (i = 0; i < 10; i++) { /* check status */ - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_DEVSTA, + &tmp16); if (tmp16 & PCI_EXP_DEVSTA_TRPND) break; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &gpu_cfg); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &bridge_cfg2); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &gpu_cfg2); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1480,26 +1562,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) msleep(100); /* linkctl */ - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, + tmp16); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL, + tmp16); /* linkctl2 */ - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); - - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(root, + PCI_EXP_LNKCTL2, + tmp16); + + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL2, + tmp16); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1514,15 +1615,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~0xf; + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL2_TLS; + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - tmp16 |= 3; /* gen3 */ + tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - tmp16 |= 2; /* gen2 */ + tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else - tmp16 |= 1; /* gen1 */ - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL); speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; @@ -1823,6 +1925,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .read_bios_from_rom = &cik_read_bios_from_rom, .read_register = &cik_read_register, .reset = &cik_asic_reset, + .reset_method = &cik_asic_reset_method, .set_vga_state = &cik_vga_set_state, .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, @@ -1835,6 +1938,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, .get_pcie_replay_count = &cik_get_pcie_replay_count, + .supports_baco = &cik_asic_supports_baco, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 54c625a2e570..f91ab4c246b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,4 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c45304f1047c..580d3f93d670 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1372,16 +1372,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version cik_sdma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 1ffd1963e765..40d2ac723dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -236,6 +236,7 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip at hsync for async, default is vsync */ @@ -243,6 +244,9 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout address */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -326,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -364,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -378,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -393,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1215,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1226,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1257,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1269,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1309,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1335,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1348,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 9e0782b54066..898ef72d423c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -254,6 +254,7 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip immediate for async, default is vsync */ @@ -261,6 +262,9 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -344,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -381,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -395,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -409,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1241,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1252,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1283,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1295,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1335,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1361,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1374,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 4bf453e07dca..db15a112becc 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -191,10 +191,14 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -277,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -305,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - + drm_connector_list_iter_end(&iter); } /** @@ -320,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -334,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1120,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int interlace = 0; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1160,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u8 *sadb = NULL; int sad_count; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1217,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1240,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1253,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 tmp = 0; @@ -1628,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); int bpc = 8; @@ -1635,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, if (!dig || !dig->afmt) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index b23418ca8f6a..f06c9022c1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -184,10 +184,14 @@ static void dce_v8_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -271,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -299,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -313,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -327,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1153,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp = 0, offset; @@ -1165,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1210,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 offset, tmp; u8 *sadb = NULL; @@ -1224,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1259,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; u32 offset; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1288,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1301,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 3cc0a16649f9..e4f94863332c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -260,15 +260,14 @@ static struct drm_encoder * dce_virtual_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) return encoder; } /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -454,13 +453,8 @@ static int dce_virtual_hw_init(void *handle) #endif /* no DCE */ break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_NAVI10: - break; default: - DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); + break; } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 844c03868248..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,13 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; +} + +static void df_v1_7_sw_fini(struct amdgpu_device *adev) +{ } static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, @@ -62,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); return df_v1_7_channel_number[fb_channel_number]; } @@ -73,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, u32 tmp; /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); @@ -88,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit boradcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, @@ -111,6 +118,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, const struct amdgpu_df_funcs df_v1_7_funcs = { .sw_init = df_v1_7_sw_init, + .sw_fini = df_v1_7_sw_fini, .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, .get_fb_channel_number = df_v1_7_get_fb_channel_number, .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index ef6e91f9f51c..5a1bd8ed1a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -27,6 +27,9 @@ #include "df/df_3_6_offset.h" #include "df/df_3_6_sh_mask.h" +#define DF_3_6_SMN_REG_INST_DIST 0x8 +#define DF_3_6_INST_CNT 8 + static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; @@ -93,6 +96,151 @@ const struct attribute_group *df_v3_6_attr_groups[] = { NULL }; +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, + uint32_t ficaa_val) +{ + unsigned long flags, address, data; + uint32_t ficadl_val, ficadh_val; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + ficadl_val = RREG32(data); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + ficadh_val = RREG32(data); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); +} + +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + WREG32(data, ficadl_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + WREG32(data, ficadh_val); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_rreg - read perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads for lo + * and hi require to preserve df finite state machine + */ +static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t *lo_val, + uint32_t hi_addr, uint32_t *hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + *lo_val = RREG32(data); + WREG32(address, hi_addr); + *hi_val = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_wreg - write to perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads after + * data writes cannot occur to preserve data fabrics finite state machine. + */ +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, + uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* same as perfmon_wreg but return status on write value check */ +static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t lo_val, + uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + uint32_t lo_val_rb, hi_val_rb; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + + WREG32(address, lo_addr); + lo_val_rb = RREG32(data); + WREG32(address, hi_addr); + hi_val_rb = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + if (!(lo_val == lo_val_rb && hi_val == hi_val_rb)) + return -EBUSY; + + return 0; +} + + +/* + * retry arming counters every 100 usecs within 1 millisecond interval. + * if retry fails after time out, return error. + */ +#define ARM_RETRY_USEC_TIMEOUT 1000 +#define ARM_RETRY_USEC_INTERVAL 100 +static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t lo_val, + uint32_t hi_addr, uint32_t hi_val) +{ + int countdown = ARM_RETRY_USEC_TIMEOUT; + + while (countdown) { + + if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val, + hi_addr, hi_val)) + break; + + countdown -= ARM_RETRY_USEC_INTERVAL; + udelay(ARM_RETRY_USEC_INTERVAL); + } + + return countdown > 0 ? 0 : -ETIME; +} + /* get the number of df counters available */ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, struct device_attribute *attr, @@ -117,6 +265,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -128,6 +302,15 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); +} + +static void df_v3_6_sw_fini(struct amdgpu_device *adev) +{ + + device_remove_file(adev->dev, &dev_attr_df_cntr_avail); + } static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, @@ -159,7 +342,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) fb_channel_number = 0; @@ -171,23 +354,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, { u32 tmp; - /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); - } else { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_DISABLE; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); - } + if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { + /* Put DF on broadcast mode */ + adev->df.funcs->enable_broadcast_mode(adev, true); + + if (enable) { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_DISABLE; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } - /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + /* Exit broadcast mode */ + adev->df.funcs->enable_broadcast_mode(adev, false); + } } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, @@ -231,20 +420,20 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, switch (target_cntr) { case 0: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4; break; case 1: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5; break; case 2: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6; break; case 3: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7; break; } @@ -268,6 +457,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint32_t *lo_val, uint32_t *hi_val) { + + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { @@ -276,40 +469,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, return -ENXIO; } - if (lo_val && hi_val) { - uint32_t eventsel, instance, unitmask; - uint32_t instance_10, instance_5432, instance_76; + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); - eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; - unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; - instance = DF_V3_6_GET_INSTANCE(config); + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; - instance_10 = instance & 0x3; - instance_5432 = (instance >> 2) & 0xf; - instance_76 = (instance >> 6) & 0x3; + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); + *hi_val = (instance_76 << 29) | instance_5432; - *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; - *hi_val = (instance_76 << 29) | instance_5432; - } + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val); return 0; } -/* assign df performance counters for read */ -static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, - uint64_t config, - int *is_assigned) +/* add df performance counters for read */ +static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, + uint64_t config) { int i, target_cntr; - *is_assigned = 0; - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - if (target_cntr >= 0) { - *is_assigned = 1; + if (target_cntr >= 0) return 0; - } for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { if (adev->df_perfmon_config_assign_mask[i] == 0U) { @@ -322,6 +508,44 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, return -ENOSPC; } +#define DEFERRED_ARM_MASK (1 << 31) +static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, + uint64_t config, bool is_deferred) +{ + int target_cntr; + + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + if (target_cntr < 0) + return -EINVAL; + + if (is_deferred) + adev->df_perfmon_config_assign_mask[target_cntr] |= + DEFERRED_ARM_MASK; + else + adev->df_perfmon_config_assign_mask[target_cntr] &= + ~DEFERRED_ARM_MASK; + + return 0; +} + +static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev, + uint64_t config) +{ + int target_cntr; + + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + /* + * we never get target_cntr < 0 since this funciton is only called in + * pmc_count for now but we should check anyways. + */ + return (target_cntr >= 0 && + (adev->df_perfmon_config_assign_mask[target_cntr] + & DEFERRED_ARM_MASK)); + +} + /* release performance counter */ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, uint64_t config) @@ -344,72 +568,40 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - WREG32_PCIE(lo_base_addr, 0UL); - WREG32_PCIE(hi_base_addr, 0UL); -} - - -static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, - uint64_t config) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - int ret, is_assigned; - - ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); - - if (ret || is_assigned) - return ret; - - ret = df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - &lo_val, - &hi_val); - - if (ret) - return ret; - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); - - WREG32_PCIE(lo_base_addr, lo_val); - WREG32_PCIE(hi_base_addr, hi_val); - - return ret; + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); } static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, int is_enable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; - int ret = 0; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + int err = 0, ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: + if (is_enable) + return df_v3_6_pmc_add_cntr(adev, config); df_v3_6_reset_perfmon_cntr(adev, config); - if (is_enable) { - ret = df_v3_6_add_perfmon_cntr(adev, config); - } else { - ret = df_v3_6_pmc_get_ctrl_settings(adev, + ret = df_v3_6_pmc_get_ctrl_settings(adev, config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); - - if (ret) - return ret; + &lo_val, + &hi_val); - lo_val = RREG32_PCIE(lo_base_addr); + if (ret) + return ret; - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); + err = df_v3_6_perfmon_arm_with_retry(adev, + lo_base_addr, + lo_val, + hi_base_addr, + hi_val); - WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); - } + if (err) + ret = df_v3_6_pmc_set_deferred(adev, config, true); break; default: @@ -422,7 +614,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, int is_disable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { @@ -431,18 +623,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); + &lo_val, + &hi_val); if (ret) return ret; - lo_val = RREG32_PCIE(lo_base_addr); - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); - - WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + df_v3_6_reset_perfmon_cntr(adev, config); if (is_disable) df_v3_6_pmc_release_cntr(adev, config); @@ -459,20 +646,31 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, uint64_t config, uint64_t *count) { - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint32_t lo_base_addr, hi_base_addr, lo_val = 0, hi_val = 0; *count = 0; switch (adev->asic_type) { case CHIP_VEGA20: - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, &hi_base_addr); if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - lo_val = RREG32_PCIE(lo_base_addr); - hi_val = RREG32_PCIE(hi_base_addr); + /* rearm the counter or throw away count value on failure */ + if (df_v3_6_pmc_is_deferred(adev, config)) { + int rearm_err = df_v3_6_perfmon_arm_with_status(adev, + lo_base_addr, lo_val, + hi_base_addr, hi_val); + + if (rearm_err) + return; + + df_v3_6_pmc_set_deferred(adev, config, false); + } + + df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, + hi_base_addr, &hi_val); *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); @@ -480,17 +678,69 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, *count = 0; DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); + config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; - default: break; } } +static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, + uint32_t df_inst) +{ + uint32_t base_addr_reg_val = 0; + uint64_t base_addr = 0; + + base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + + df_inst * DF_3_6_SMN_REG_INST_DIST); + + if (REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + AddrRngVal) == 0) { + DRM_WARN("address range not valid"); + return 0; + } + + base_addr = REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + DramBaseAddr); + + return base_addr << 28; +} + +static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) +{ + uint32_t xgmi_node_id = 0; + uint32_t df_inst_id = 0; + + /* Walk through DF dst nodes to find current XGMI node */ + for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { + + xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + + df_inst_id * DF_3_6_SMN_REG_INST_DIST); + xgmi_node_id = REG_GET_FIELD(xgmi_node_id, + DF_CS_UMC_AON0_DramLimitAddress0, + DstFabricID); + + /* TODO: establish reason dest fabric id is offset by 7 */ + xgmi_node_id = xgmi_node_id >> 7; + + if (adev->gmc.xgmi.physical_node_id == xgmi_node_id) + break; + } + + if (df_inst_id == DF_3_6_INST_CNT) { + DRM_WARN("cant match df dst id with gpu node"); + return 0; + } + + return df_inst_id; +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, + .sw_fini = df_v3_6_sw_fini, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, .get_fb_channel_number = df_v3_6_get_fb_channel_number, .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, @@ -499,5 +749,9 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .get_clockgating_state = df_v3_6_get_clockgating_state, .pmc_start = df_v3_6_pmc_start, .pmc_stop = df_v3_6_pmc_stop, - .pmc_get_count = df_v3_6_pmc_get_count + .pmc_get_count = df_v3_6_pmc_get_count, + .get_fica = df_v3_6_get_fica, + .set_fica = df_v3_6_set_fica, + .get_dram_base_addr = df_v3_6_get_dram_base_addr, + .get_df_inst_id = df_v3_6_get_df_inst_id }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f41287f9000d..1785fdad6ecb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -20,8 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/delay.h> +#include <linux/kernel.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_psp.h" @@ -36,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -46,9 +51,6 @@ * Navi10 has two graphic rings to share each graphic pipe. * 1. Primary ring * 2. Async ring - * - * In bring-up phase, it just used primary ring so set gfx ring number as 1 at - * first. */ #define GFX10_NUM_GFX_RINGS 2 #define GFX10_MEC_HPD_SIZE 2048 @@ -56,6 +58,9 @@ #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -63,11 +68,30 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi12_me.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), @@ -91,17 +115,20 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000) }; static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = @@ -109,6 +136,102 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = /* Pending on emulation bring up */ }; +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = +{ + /* Pending on emulation bring up */ +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -223,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 2, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) @@ -250,6 +387,22 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_0_nv10, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_nv14, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2_nv12, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); + break; default: break; } @@ -331,20 +484,12 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); - } - if (i < adev->usec_timeout) { - if (amdgpu_emu_mode == 1) - DRM_INFO("ring test on %d succeeded in %d msecs\n", - ring->idx, i); - else - DRM_INFO("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; + udelay(1); } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + amdgpu_gfx_scratch_free(adev, scratch); return r; @@ -394,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -429,6 +570,31 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("CP firmware version too old, please update!"); +} + + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -450,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) +{ + bool ret = false; + + switch (adev->pdev->revision) { + case 0xc2: + case 0xc3: + ret = true; + break; + default: + ret = false; + break; + } + + return ret ; +} + static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: - adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; default: break; @@ -464,7 +648,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; + char wks[10]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -477,15 +662,25 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); + memset(wks, 0, sizeof(wks)); switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + if (!(adev->pdev->device == 0x7340 && + adev->pdev->revision != 0x00)) + snprintf(wks, sizeof(wks), "_wks"); + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); if (err) goto out; @@ -496,7 +691,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); if (err) goto out; @@ -507,7 +702,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); if (err) goto out; @@ -518,61 +713,63 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - version_major = le16_to_cpu(rlc_hdr->header.header_version_major); - version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = + if (!amdgpu_sriov_vf(adev)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = + adev->gfx.rlc.clear_state_descriptor_offset = le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = + adev->gfx.rlc.avail_scratch_ram_locations = le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = + adev->gfx.rlc.reg_restore_list_size = le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = + adev->gfx.rlc.reg_list_format_start = le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = + adev->gfx.rlc.reg_list_format_separate_start = le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = + adev->gfx.rlc.starting_offsets_start = le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = + adev->gfx.rlc.reg_list_format_size_bytes = le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = + adev->gfx.rlc.reg_list_size_bytes = le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = + adev->gfx.rlc.register_list_format = kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v10_0_init_rlc_ext_microcode(adev); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -583,7 +780,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); @@ -625,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - + if (info->fw) { + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } if (adev->gfx.rlc.is_rlc_v2_1 && adev->gfx.rlc.save_restore_list_cntl_size_bytes && adev->gfx.rlc.save_restore_list_gpm_size_bytes && @@ -686,6 +884,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } } + gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, @@ -820,39 +1019,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1026,6 +1192,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1133,6 +1301,8 @@ static int gfx_v10_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; @@ -1292,7 +1462,7 @@ static int gfx_v10_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); amdgpu_gfx_kiq_fini(adev); gfx_v10_0_pfp_fini(adev); @@ -1452,6 +1622,25 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + + static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) { int i, j, k; @@ -1461,7 +1650,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) u32 utcl_invreq_disable = 0; /* * GCRD_TARGETS_DISABLE field contains - * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] */ u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1469,7 +1659,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4); /* GL1C */ /* * UTCL1_UTCL0_INVREQ_DISABLE field contains - * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] */ u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1477,7 +1668,9 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) { mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -1518,6 +1711,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + static void gfx_v10_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1529,13 +1733,14 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); adev->gfx.config.pa_sc_tile_steering_override = gfx_v10_0_init_pa_sc_tile_steering_override(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -1552,6 +1757,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v10_0_init_compute_vmid(adev); + gfx_v10_0_init_gds_vmid(adev); } @@ -1572,24 +1778,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); -} - -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) -{ - gfx_v10_0_init_csb(adev); - - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); - /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1624,9 +1824,9 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, * hence no handshake between SMU & RLC * GFXOFF will be disabled */ - rlc_pg_cntl |= 0x80000; + rlc_pg_cntl |= 0x800000; } else - rlc_pg_cntl &= ~0x80000; + rlc_pg_cntl &= ~0x800000; WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); } @@ -1684,18 +1884,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (amdgpu_sriov_vf(adev)) - return 0; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); - /* enable RLC SRM */ - gfx_v10_0_rlc_enable_srm(adev); + gfx_v10_0_init_csb(adev); + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); } else { adev->gfx.rlc.funcs->stop(adev); @@ -1717,7 +1915,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + gfx_v10_0_init_csb(adev); + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -1767,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; rlc_toc++; - }; + } return 0; } @@ -2184,7 +2383,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) return 0; } -static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); @@ -2197,7 +2396,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; } static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) @@ -2254,7 +2463,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2324,7 +2533,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -2393,7 +2602,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2568,7 +2777,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* Init gfx ring 0 for pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); - mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ ring = &adev->gfx.gfx_ring[0]; rb_bufsz = order_base_2(ring->ring_size / 8); @@ -2606,11 +2815,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - mutex_unlock(&adev->srbm_mutex); ring = &adev->gfx.gfx_ring[1]; rb_bufsz = order_base_2(ring->ring_size / 8); tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); @@ -2640,6 +2849,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); @@ -2714,7 +2924,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -2898,6 +3108,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); @@ -2909,14 +3120,15 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) #endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; + adev->wb.wb[ring->wptr_offs] = 0; amdgpu_ring_clear_ring(ring); #ifdef BRING_UP_DEBUG mutex_lock(&adev->srbm_mutex); @@ -3125,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3397,23 +3612,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - DRM_INFO("gfx %d ring me %d pipe %d q %d\n", - i, ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - ring->sched.ready = true; - DRM_INFO("compute ring %d mec %d pipe %d q %d\n", - i, ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_helper(ring); if (r) - ring->sched.ready = false; + return r; } return 0; @@ -3516,10 +3724,6 @@ static int gfx_v10_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gfx_v10_0_csb_vram_pin(adev); - if (r) - return r; - if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -3602,32 +3806,23 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); if (amdgpu_sriov_vf(adev)) { - pr_debug("For SRIOV client, shouldn't do anything.\n"); + gfx_v10_0_cp_gfx_enable(adev, false); return 0; } gfx_v10_0_cp_enable(adev, false); gfx_v10_0_enable_gui_idle_interrupt(adev, false); - gfx_v10_0_csb_vram_unpin(adev); return 0; } static int gfx_v10_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->in_suspend = true; - return gfx_v10_0_hw_fini(adev); + return gfx_v10_0_hw_fini(handle); } static int gfx_v10_0_resume(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = gfx_v10_0_hw_init(adev); - adev->in_suspend = false; - return r; + return gfx_v10_0_hw_init(handle); } static bool gfx_v10_0_is_idle(void *handle) @@ -4034,9 +4229,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4056,8 +4252,10 @@ static int gfx_v10_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: gfx_v10_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -4173,7 +4371,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4193,8 +4391,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio_funcs->get_hdp_flush_req_offset(adev), - adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -4453,7 +4651,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -4539,6 +4737,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -4547,9 +4746,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, @@ -4581,6 +4780,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -4927,7 +5144,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, .set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -4971,6 +5188,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -4978,7 +5196,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5004,6 +5222,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -5011,7 +5230,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5034,6 +5253,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5088,6 +5308,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; default: @@ -5097,15 +5319,12 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) { - /* init asic gds info */ - switch (adev->asic_type) { - case CHIP_NAVI10: - default: - adev->gds.gds_size = 0x10000; - adev->gds.gds_compute_max_wave_id = 0x4ff; - break; - } + unsigned total_cu = adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines; + adev->gds.gds_size = 0x10000; + adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; adev->gds.gws_size = 64; adev->gds.oa_size = 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7f0a63628c43..31f44d05e606 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1576,7 +1576,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev) static void gfx_v6_0_constants_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 sx_debug_1; u32 hdp_host_path_cntl; u32 tmp; @@ -1678,7 +1678,6 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 21187275dfd3..8f20a5dd44fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1890,6 +1890,24 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v7_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 1; @@ -1968,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -4239,7 +4258,7 @@ static int gfx_v7_0_late_init(void *handle) static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; @@ -4316,7 +4335,6 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; @@ -4535,6 +4553,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ee1ccdcf2d30..fa245973de12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1710,7 +1677,7 @@ fail: static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; int ret; @@ -1850,7 +1817,6 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; @@ -2103,7 +2069,7 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); @@ -3750,6 +3716,24 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v8_0_config_init(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -3816,6 +3800,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v8_0_init_compute_vmid(adev); + gfx_v8_0_init_gds_vmid(adev); mutex_lock(&adev->grbm_idx_mutex); /* @@ -3898,6 +3883,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v8_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32(mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -4572,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -4818,10 +4807,6 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4939,8 +4924,6 @@ static int gfx_v8_0_hw_fini(void *handle) pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); - gfx_v8_0_csb_vram_unpin(adev); - return 0; } @@ -6165,7 +6148,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* EVENT_WRITE_EOP - flush caches, send int */ + /* Workaround for cache flush problems. First send a dummy EOP + * event down the pipe with seq one below. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + DATA_SEL(1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq - 1)); + amdgpu_ring_write(ring, upper_32_bits(seq - 1)); + + /* Then send the real EOP event down the pipe: + * EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | @@ -6450,6 +6449,7 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -6458,9 +6458,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, @@ -6907,7 +6907,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ - 8 + /* FENCE for VM_FLUSH */ + 12 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, the first COND_EXEC jump to the place just @@ -6919,7 +6919,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ - 8 + 8 + /* FENCE x2 */ + 12 + 12 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 04b8ac4432c7..b33a4eb39193 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -36,10 +36,10 @@ #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" + #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" -#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -48,6 +48,8 @@ #include "amdgpu_ras.h" +#include "gfx_v9_4.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -60,6 +62,9 @@ #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L +#define mmGCEA_PROBE_MAP 0x070c +#define mmGCEA_PROBE_MAP_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -104,6 +109,397 @@ MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); +MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); +MODULE_FIRMWARE("amdgpu/renoir_me.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); +MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); + +#define mmTCP_CHAN_STEER_0_ARCT 0x0b03 +#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_1_ARCT 0x0b04 +#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_2_ARCT 0x0b09 +#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a +#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b +#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c +#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 + +enum ta_ras_gfx_subblock { + /*CPC*/ + TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, + TA_RAS_BLOCK__GFX_CPC_UCODE, + TA_RAS_BLOCK__GFX_DC_STATE_ME1, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, + TA_RAS_BLOCK__GFX_DC_STATE_ME2, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF*/ + TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, + TA_RAS_BLOCK__GFX_CPF_TAG, + TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, + /* CPG*/ + TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_TAG, + TA_RAS_BLOCK__GFX_CPG_TAG, + TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, + /* GDS*/ + TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI*/ + TA_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_LDS_D, + TA_RAS_BLOCK__GFX_SQ_LDS_I, + TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges)*/ + TA_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0*/ + TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, + TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + TA_RAS_BLOCK__GFX_SQC_INDEX0_START, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_INDEX0_END = + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1*/ + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX1_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2*/ + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX2_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA*/ + TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_AFIFO, + TA_RAS_BLOCK__GFX_TA_FL_LFIFO, + TA_RAS_BLOCK__GFX_TA_FX_LFIFO, + TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA*/ + TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges)*/ + TA_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0*/ + TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1*/ + TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + TA_RAS_BLOCK__GFX_TCC_INDEX1_END = + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2*/ + TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + TA_RAS_BLOCK__GFX_TCC_INDEX2_END = + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3*/ + TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_INDEX3_END = + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4*/ + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX4_END = + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI*/ + TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP*/ + TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, + TA_RAS_BLOCK__GFX_TCP_VM_FIFO, + TA_RAS_BLOCK__GFX_TCP_DB_RAM, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD*/ + TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + TA_RAS_BLOCK__GFX_TD_CS_FIFO, + TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges)*/ + TA_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0*/ + TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1*/ + TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2*/ + TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank*/ + TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker*/ + TA_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + TA_RAS_BLOCK__GFX_MAX +}; + +struct ras_gfx_subblock { + unsigned char *name; + int ta_subblock; + int hw_supported_error_type; + int sw_supported_error_type; +}; + +#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ + [AMDGPU_RAS_BLOCK__##subblock] = { \ + #subblock, \ + TA_RAS_BLOCK__##subblock, \ + ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ + (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ + } + +static const struct ras_gfx_subblock ras_gfx_subblocks[] = { + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; + static const struct soc15_reg_golden golden_settings_gc_9_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), @@ -123,9 +519,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -188,9 +584,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -227,6 +623,22 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), +}; + static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), @@ -266,9 +678,23 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) +}; + +static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -310,19 +736,150 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev); +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); + +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -340,6 +897,11 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_0_vg20, ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_gc_9_4_1_arct, + ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1)); @@ -352,12 +914,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_1_rv1, ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rn, + ARRAY_SIZE(golden_settings_gc_9_1_rn)); + return; /* for renoir, don't need common goldensetting */ default: break; } - soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, - (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); + if (adev->asic_type != CHIP_ARCTURUS) + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, + (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) @@ -538,6 +1106,12 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("CP firmware version too old, please update!"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && @@ -588,66 +1162,80 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) } } +struct amdgpu_gfxoff_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) +{ + const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { + if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: break; case CHIP_RAVEN: - if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - break; - if ((adev->gfx.rlc_fw_version != 106 && - adev->gfx.rlc_fw_version < 531) || - (adev->gfx.rlc_fw_version == 53815) || - (adev->gfx.rlc_feature_version < 1) || - !adev->gfx.rlc.is_rlc_v2_1) + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) && + ((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; + case CHIP_RENOIR: + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; break; default: break; } } -static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, + const char *chip_name) { - const char *chip_name; char fw_name[30]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; - uint16_t version_major; - uint16_t version_minor; - uint32_t smu_version; - - DRM_DEBUG("\n"); - - switch (adev->asic_type) { - case CHIP_VEGA10: - chip_name = "vega10"; - break; - case CHIP_VEGA12: - chip_name = "vega12"; - break; - case CHIP_VEGA20: - chip_name = "vega20"; - break; - case CHIP_RAVEN: - if (adev->rev_id >= 8) - chip_name = "raven2"; - else if (adev->pdev->device == 0x15d8) - chip_name = "picasso"; - else - chip_name = "raven"; - break; - default: - BUG(); - } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -682,6 +1270,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + uint32_t smu_version; + /* * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin * instead of picasso_rlc.bin. @@ -756,57 +1396,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.rlc.is_rlc_v2_1) gfx_v9_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.mec_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); - if (!err) { - err = amdgpu_ucode_validate(adev->gfx.mec2_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); - } else { - err = 0; - adev->gfx.mec2_fw = NULL; - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; @@ -836,7 +1426,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); } + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -859,13 +1500,19 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } + /* TODO: Determine if MEC2 JT FW loading can be removed + for all GFX V9 asic and above */ + if (adev->asic_type != CHIP_ARCTURUS && + adev->asic_type != CHIP_RENOIR) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } } out: @@ -875,14 +1522,6 @@ out: dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->gfx.pfp_fw); - adev->gfx.pfp_fw = NULL; - release_firmware(adev->gfx.me_fw); - adev->gfx.me_fw = NULL; - release_firmware(adev->gfx.ce_fw); - adev->gfx.ce_fw = NULL; - release_firmware(adev->gfx.rlc_fw); - adev->gfx.rlc_fw = NULL; release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; release_firmware(adev->gfx.mec2_fw); @@ -891,6 +1530,59 @@ out: return err; } +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_VEGA12: + chip_name = "vega12"; + break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + /* No CPG in Arcturus */ + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); + if (r) + return r; + } + + r = gfx_v9_0_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) { u32 count = 0; @@ -1128,7 +1820,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ r = amdgpu_gfx_rlc_init_cpt(adev); @@ -1150,39 +1842,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1324,7 +1983,20 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .read_wave_data = &gfx_v9_0_read_wave_data, .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_0_ras_error_inject, + .query_ras_error_count = &gfx_v9_0_query_ras_error_count +}; + +static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_0_select_se_sh, + .read_wave_data = &gfx_v9_0_read_wave_data, + .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_4_ras_error_inject, + .query_ras_error_count = &gfx_v9_4_query_ras_error_count }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -1377,6 +2049,27 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) else gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_ARCTURUS: + adev->gfx.funcs = &gfx_v9_4_gfx_funcs; + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + break; + case CHIP_RENOIR: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22010042; + break; default: BUG(); break; @@ -1422,190 +2115,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, - struct amdgpu_ngg_buf *ngg_buf, - int size_se, - int default_size_se) -{ - int r; - - if (size_se < 0) { - dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); - return -EINVAL; - } - size_se = size_se ? size_se : default_size_se; - - ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; - r = amdgpu_bo_create_kernel(adev, ngg_buf->size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &ngg_buf->bo, - &ngg_buf->gpu_addr, - NULL); - if (r) { - dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); - return r; - } - ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); - - return r; -} - -static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < NGG_BUF_MAX; i++) - amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, - &adev->gfx.ngg.buf[i].gpu_addr, - NULL); - - memset(&adev->gfx.ngg.buf[0], 0, - sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); - - adev->gfx.ngg.init = false; - - return 0; -} - -static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) -{ - int r; - - if (!amdgpu_ngg || adev->gfx.ngg.init == true) - return 0; - - /* GDS reserve memory: 64 bytes alignment */ - adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; - adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); - adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); - - /* Primitive Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], - amdgpu_prim_buf_per_se, - 64 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Primitive Buffer\n"); - goto err; - } - - /* Position Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], - amdgpu_pos_buf_per_se, - 256 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Position Buffer\n"); - goto err; - } - - /* Control Sideband */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], - amdgpu_cntl_sb_buf_per_se, - 256); - if (r) { - dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); - goto err; - } - - /* Parameter Cache, not created by default */ - if (amdgpu_param_buf_per_se <= 0) - goto out; - - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], - amdgpu_param_buf_per_se, - 512 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Parameter Cache\n"); - goto err; - } - -out: - adev->gfx.ngg.init = true; - return 0; -err: - gfx_v9_0_ngg_fini(adev); - return r; -} - -static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; - int r; - u32 data, base; - - if (!amdgpu_ngg) - return 0; - - /* Program buffer size */ - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PRIM].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, - adev->gfx.ngg.buf[NGG_POS].size >> 8); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); - - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, - adev->gfx.ngg.buf[NGG_CNTL].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PARAM].size >> 10); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); - - /* Program buffer base address */ - base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); - - /* Clear GDS reserved memory */ - r = amdgpu_ring_alloc(ring, 17); - if (r) { - DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", - ring->name, r); - return r; - } - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.gds_size + - adev->gfx.ngg.gds_reserve_size)); - - amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); - amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | - PACKET3_DMA_DATA_DST_SEL(1) | - PACKET3_DMA_DATA_SRC_SEL(2))); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | - adev->gfx.ngg.gds_reserve_size); - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); - - amdgpu_ring_commit(ring); - - return 0; -} - static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { @@ -1653,6 +2162,8 @@ static int gfx_v9_0_sw_init(void *handle) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.mec.num_mec = 2; break; default: @@ -1771,10 +2282,6 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_init(adev); - if (r) - return r; - return 0; } @@ -1784,19 +2291,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } + amdgpu_gfx_ras_fini(adev); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -1804,13 +2299,12 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); - gfx_v9_0_ngg_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); @@ -1929,6 +2423,40 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + +static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) +{ + uint32_t tmp; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); + tmp = REG_SET_FIELD(tmp, SQ_CONFIG, + DISABLE_BARRIER_WAITCNT, 1); + WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); + break; + default: + break; + }; +} + static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1945,7 +2473,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { @@ -1973,6 +2501,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); + gfx_v9_0_init_gds_vmid(adev); + gfx_v9_0_init_sq_config(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2028,6 +2558,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -2357,7 +2888,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - gfx_v9_1_init_rlc_save_restore_list(adev); + if (adev->asic_type == CHIP_VEGA12 || + (adev->asic_type == CHIP_RAVEN && + adev->rev_id >= 8)) + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); } @@ -2768,74 +3302,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2849,6 +3315,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se1 = 0xffffffff; mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_static_thread_mgmt_se4 = 0xffffffff; + mqd->compute_static_thread_mgmt_se5 = 0xffffffff; + mqd->compute_static_thread_mgmt_se6 = 0xffffffff; + mqd->compute_static_thread_mgmt_se7 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; mqd->dynamic_cu_mask_addr_lo = @@ -2968,8 +3438,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3238,7 +3711,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3252,10 +3725,12 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy firmware loading */ - r = gfx_v9_0_cp_gfx_load_microcode(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + } r = gfx_v9_0_cp_compute_load_microcode(adev); if (r) @@ -3266,18 +3741,22 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - r = gfx_v9_0_cp_gfx_resume(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + } r = gfx_v9_0_kcq_resume(adev); if (r) return r; - ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3289,9 +3768,27 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_cp_gfx_enable(adev, enable); + if (adev->asic_type != CHIP_ARCTURUS) + gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -3300,13 +3797,12 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v9_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + gfx_v9_0_init_golden_registers(adev); gfx_v9_0_constants_init(adev); - r = gfx_v9_0_csb_vram_pin(adev); - if (r) - return r; + gfx_v9_0_init_tcp_config(adev); r = adev->gfx.rlc.funcs->resume(adev); if (r) @@ -3316,40 +3812,6 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_en(adev); - if (r) - return r; - - return r; -} - -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - return r; } @@ -3361,8 +3823,10 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + /* DF freeze and kcq disable will fail */ + if (!amdgpu_ras_intr_triggered()) + /* disable KCQ to avoid CPC touch memory not valid anymore */ + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3391,8 +3855,6 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); - gfx_v9_0_csb_vram_unpin(adev); - return 0; } @@ -3466,8 +3928,9 @@ static int gfx_v9_0_soft_reset(void *handle) /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - /* Disable GFX parsing/prefetching */ - gfx_v9_0_cp_gfx_enable(adev, false); + if (adev->asic_type != CHIP_ARCTURUS) + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); @@ -3497,9 +3960,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } @@ -3567,33 +4043,61 @@ static const u32 sgpr_init_compute_shader[] = 0xbe800080, 0xbf810000, }; +/* When below register arrays changed, please update gpr_reg_size, + and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, + to cover all gfx9 ASICs */ static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const struct soc15_reg_entry sgpr1_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, }; -static const struct soc15_reg_entry sgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +static const struct soc15_reg_entry sgpr2_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, }; -static const struct soc15_reg_entry sec_ded_counter_registers[] = { +static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, @@ -3614,6 +4118,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, @@ -3626,6 +4131,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, + { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -3633,6 +4139,10 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; int i, r; + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + r = amdgpu_ring_alloc(ring, 7); if (r) { DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", @@ -3676,10 +4186,16 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; struct dma_fence *f = NULL; - int r, i, j, k; + int r, i; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; + int compute_dim_x = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se; + int sgpr_work_group_size = 5; + int gpr_reg_size = compute_dim_x / 16 + 6; + /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return 0; @@ -3689,9 +4205,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; total_size = - ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ + total_size += + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ total_size += - ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -3718,7 +4236,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* VGPR */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -3734,7 +4252,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -3744,13 +4262,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); - /* SGPR */ + /* SGPR1 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; @@ -3762,7 +4280,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR2 */ + /* write the register state for the compute dispatch */ + for (i = 0; i < gpr_reg_size; i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -3786,18 +4332,17 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) goto fail; } - /* read back registers to clear the counters */ - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0x0, k); - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - } - } + switch (adev->asic_type) + { + case CHIP_VEGA20: + gfx_v9_0_clear_ras_edc_counter(adev); + break; + case CHIP_ARCTURUS: + gfx_v9_4_clear_ras_edc_counter(adev); + break; + default: + break; } - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); - mutex_unlock(&adev->grbm_idx_mutex); fail: amdgpu_ib_free(adev, &ib, NULL); @@ -3810,8 +4355,12 @@ static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + if (adev->asic_type == CHIP_ARCTURUS) + adev->gfx.num_gfx_rings = 0; + else + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); @@ -3820,108 +4369,33 @@ static int gfx_v9_0_early_init(void *handle) return 0; } -static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); - static int gfx_v9_0_ecc_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .cb = gfx_v9_0_process_ras_data_cb, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "gfx_err_count", - .debugfs_name = "gfx_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__GFX, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "gfx", - }; int r; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); - return 0; + /* + * Temp workaround to fix the issue that CP firmware fails to + * update read pointer when CPDMA is writing clearing operation + * to GDS in suspend/resume sequence on several cards. So just + * limit this operation in cold boot sequence. + */ + if (!adev->in_suspend) { + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; } - r = gfx_v9_0_do_edc_gds_workarounds(adev); - if (r) - return r; - /* requires IBs so do in late init after IB pool is initialized */ r = gfx_v9_0_do_edc_gpr_workarounds(adev); if (r) return r; - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__GFX); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = ras_block; - - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__GFX); - r = 0; - } - goto feature; - } - - ih_info.head = **ras_if; - fs_info.head = **ras_if; - - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - - amdgpu_ras_debugfs_create(adev, &fs_info); - - r = amdgpu_ras_sysfs_create(adev, &fs_info); - if (r) - goto sysfs; -resume: - r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + r = amdgpu_gfx_ras_late_init(adev); if (r) - goto irq; + return r; return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; } static int gfx_v9_0_late_init(void *handle) @@ -3992,7 +4466,8 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); } else { gfx_v9_0_enable_gfx_cg_power_gating(adev, false); - gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) + gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } amdgpu_gfx_rlc_exit_safe_mode(adev); @@ -4097,6 +4572,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (adev->asic_type == CHIP_ARCTURUS) + return; + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ @@ -4162,8 +4640,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->asic_type == CHIP_ARCTURUS) + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + else + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; @@ -4231,10 +4713,11 @@ static int gfx_v9_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4289,8 +4772,10 @@ static int gfx_v9_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: gfx_v9_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -4307,12 +4792,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -4321,23 +4806,25 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; - /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + if (adev->asic_type != CHIP_ARCTURUS) { + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; - /* AMD_CG_SUPPORT_GFX_3D_CGLS */ - if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + } } static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) @@ -4379,7 +4866,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4399,8 +4886,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio_funcs->get_hdp_flush_req_offset(adev), - adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -4801,6 +5288,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -4809,9 +5297,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, @@ -5132,31 +5620,788 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + +static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) + }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) + }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), + 0, 0 + }, + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), + 0, 0 + }, + { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) + }, + { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), + 0, 0 + }, + { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) + }, + { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), + SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) + }, + { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), + 0, 0 + }, + { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), + 0, 0 + }, + { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), + 0, 0 + }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) + }, + { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), + 0, 0 + }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) + }, + { "GDS_OA_PHY_PHY_CMD_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) + }, + { "GDS_OA_PHY_PHY_DATA_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), + 0, 0 + }, + { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) + }, + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), + 0, 0 + }, + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) + }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), + 0, 0 + }, + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), + 0, 0 + }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) + }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) + }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) + }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) + }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) + }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), + 0, 0 + }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), + 0, 0 + }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), + 0, 0 + }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), + 0, 0 + }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), + 0, 0 + }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), + 0, 0 + }, + { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), + 0, 0 + }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), + 0, 0 + }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), + 0, 0 + }, + { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), + 0, 0 + }, + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), + 0, 0 + }, + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) + }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) + }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), + 0, 0 + }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), + 0, 0 + }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), + 0, 0 + }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) + }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) + }, + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) + }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) + }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) + }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) + }, + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) + }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) + }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) + }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) + }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) + }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) + }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) + }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), + 0, 0 + }, + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) + }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) + }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), + 0, 0 + }, + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) + }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) + }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) + }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) + }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) + }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0 + }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0 + }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0 + }, + { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) + }, + { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) + }, + { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) + }, + { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), + 0, 0 + } +}; + +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) + return -EINVAL; + + if (!ras_gfx_subblocks[info->head.sub_block_index].name) + return -EPERM; + + if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = + ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + +static const char *vml2_mems[] = { + "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_0_4K_MEM0", + "UTC_VML2_BANK_CACHE_0_4K_MEM1", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_1_4K_MEM0", + "UTC_VML2_BANK_CACHE_1_4K_MEM1", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_2_4K_MEM0", + "UTC_VML2_BANK_CACHE_2_4K_MEM1", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_3_4K_MEM0", + "UTC_VML2_BANK_CACHE_3_4K_MEM1", +}; + +static const char *vml2_walker_mems[] = { + "UTC_VML2_CACHE_PDE0_MEM0", + "UTC_VML2_CACHE_PDE0_MEM1", + "UTC_VML2_CACHE_PDE1_MEM0", + "UTC_VML2_CACHE_PDE1_MEM1", + "UTC_VML2_CACHE_PDE2_MEM0", + "UTC_VML2_CACHE_PDE2_MEM1", + "UTC_VML2_RDIF_LOG_FIFO", +}; + +static const char *atc_l2_cache_2m_mems[] = { + "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", +}; + +static const char *atc_l2_cache_4k_mems[] = { + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", +}; + +static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, + struct ras_err_data *err_data) +{ + uint32_t i, data; + uint32_t sec_count, ded_count; + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); + + sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); + + sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_walker_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_walker_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); + + sec_count = (data & 0x00006000L) >> 0xd; + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_2m_mems[i], sec_count); + err_data->ce_count += sec_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); + + sec_count = (data & 0x00006000L) >> 0xd; + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_4k_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = (data & 0x00018000L) >> 0xf; + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_4k_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + + return 0; +} + +static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t se_id, uint32_t inst_id, uint32_t value, + uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { + if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_0_ras_fields[i].seg != reg->seg || + gfx_v9_0_ras_fields[i].inst != reg->inst) + continue; + + sec_cnt = (value & + gfx_v9_0_ras_fields[i].sec_count_mask) >> + gfx_v9_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", + gfx_v9_0_ras_fields[i].name, + se_id, inst_id, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + gfx_v9_0_ras_fields[i].ded_count_mask) >> + gfx_v9_0_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", + gfx_v9_0_ras_fields[i].name, + se_id, inst_id, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev) { - /* TODO ue will trigger an interrupt. */ - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + int i, j, k; + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); + } + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); } -static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i, j, k; + uint32_t reg_value; - if (!ras_if) - return 0; + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0, k); + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); + if (reg_value) + gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i], + j, k, reg_value, + &sec_count, &ded_count); + } + } + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; + + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); - ih_data.head = *ras_if; + gfx_v9_0_query_utc_edc_status(adev, err_data); - DRM_ERROR("CP ECC ERROR IRQ\n"); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } @@ -5183,7 +6428,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -5234,7 +6479,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5269,7 +6514,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5323,7 +6568,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { .set = gfx_v9_0_set_cp_ecc_error_state, - .process = gfx_v9_0_cp_ecc_error_irq, + .process = amdgpu_gfx_cp_ecc_error_irq, }; @@ -5349,6 +6594,8 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -5366,6 +6613,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: + case CHIP_ARCTURUS: adev->gds.gds_size = 0x1000; break; default: @@ -5387,6 +6635,9 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) else adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ break; + case CHIP_ARCTURUS: + adev->gds.gds_compute_max_wave_id = 0xfff; + break; default: /* this really depends on the chip */ adev->gds.gds_compute_max_wave_id = 0x7ff; @@ -5431,12 +6682,21 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - unsigned disable_masks[4 * 2]; + unsigned disable_masks[4 * 4]; if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -5445,11 +6705,23 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, ao_bitmap = 0; counter = 0; gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); - if (i < 4 && j < 2) - gfx_v9_0_set_user_cu_inactive_bitmap( - adev, disable_masks[i * 2 + j]); + gfx_v9_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { @@ -5462,7 +6734,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; if (i < 2 && j < 2) ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c new file mode 100644 index 000000000000..f099f13d7f1e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -0,0 +1,978 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/kernel.h> + +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" + +#include "gc/gc_9_4_1_offset.h" +#include "gc/gc_9_4_1_sh_mask.h" +#include "soc15_common.h" + +#include "gfx_v9_4.h" +#include "amdgpu_ras.h" + +static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = { + /* CPC */ + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 }, + /* DC */ + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 }, + /* CPF */ + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 }, + /* GDS */ + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 }, + /* SPI */ + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 }, + /* SQ */ + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16 }, + /* SQC */ + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 }, + /* TA */ + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 }, + /* TCA */ + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 }, + /* TCC */ + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 }, + /* TCI */ + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 }, + /* TCP */ + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 }, + /* TD */ + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 }, + /* GCEA */ + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 }, + /* RLC */ + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 }, +}; + +static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); +} + +static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = { + /* CPC */ + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) }, + { "CPC_DC_RESTORE_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) }, + { "CPC_DC_RESTORE_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) }, + + /* CPF */ + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) }, + { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) }, + + /* GDS */ + { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, + { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + + /* SPI */ + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) }, + { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) }, + { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) }, + { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) }, + { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) }, + + /* SQ */ + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) }, + + /* SQC */ + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + + /* TA */ + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) }, + + /* TCA */ + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) }, + + /* TCC */ + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) }, + + /* TCI */ + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) }, + + /* TCP */ + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + + /* TD */ + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) }, + + /* EA */ + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) }, + { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) }, + { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) }, + { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) }, + { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) }, + { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) }, + { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) }, + { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) }, + { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) }, + + /* RLC */ + { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) }, + { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) }, + { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) }, + { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) }, + { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) }, + { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) }, + { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) }, + { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) }, +}; + +static const char * const vml2_mems[] = { + "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_0_4K_MEM0", + "UTC_VML2_BANK_CACHE_0_4K_MEM1", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_1_4K_MEM0", + "UTC_VML2_BANK_CACHE_1_4K_MEM1", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_2_4K_MEM0", + "UTC_VML2_BANK_CACHE_2_4K_MEM1", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_3_4K_MEM0", + "UTC_VML2_BANK_CACHE_3_4K_MEM1", + "UTC_VML2_IFIFO_GROUP0", + "UTC_VML2_IFIFO_GROUP1", + "UTC_VML2_IFIFO_GROUP2", + "UTC_VML2_IFIFO_GROUP3", + "UTC_VML2_IFIFO_GROUP4", + "UTC_VML2_IFIFO_GROUP5", + "UTC_VML2_IFIFO_GROUP6", + "UTC_VML2_IFIFO_GROUP7", + "UTC_VML2_IFIFO_GROUP8", + "UTC_VML2_IFIFO_GROUP9", + "UTC_VML2_IFIFO_GROUP10", + "UTC_VML2_IFIFO_GROUP11", + "UTC_VML2_IFIFO_GROUP12", + "UTC_VML2_IFIFO_GROUP13", + "UTC_VML2_IFIFO_GROUP14", + "UTC_VML2_IFIFO_GROUP15", + "UTC_VML2_IFIFO_GROUP16", + "UTC_VML2_IFIFO_GROUP17", + "UTC_VML2_IFIFO_GROUP18", + "UTC_VML2_IFIFO_GROUP19", + "UTC_VML2_IFIFO_GROUP20", + "UTC_VML2_IFIFO_GROUP21", + "UTC_VML2_IFIFO_GROUP22", + "UTC_VML2_IFIFO_GROUP23", + "UTC_VML2_IFIFO_GROUP24", +}; + +static const char * const vml2_walker_mems[] = { + "UTC_VML2_CACHE_PDE0_MEM0", + "UTC_VML2_CACHE_PDE0_MEM1", + "UTC_VML2_CACHE_PDE1_MEM0", + "UTC_VML2_CACHE_PDE1_MEM1", + "UTC_VML2_CACHE_PDE2_MEM0", + "UTC_VML2_CACHE_PDE2_MEM1", + "UTC_VML2_RDIF_ARADDRS", + "UTC_VML2_RDIF_LOG_FIFO", + "UTC_VML2_QUEUE_REQ", + "UTC_VML2_QUEUE_RET", +}; + +static const char * const utcl2_router_mems[] = { + "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0", + "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1", + "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2", + "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3", + "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4", + "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5", + "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6", + "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7", + "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8", + "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9", + "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10", + "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11", + "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12", + "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13", + "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14", + "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15", + "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16", + "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17", + "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18", + "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19", + "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20", + "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21", + "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22", + "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23", + "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24", +}; + +static const char * const atc_l2_cache_2m_mems[] = { + "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", +}; + +static const char * const atc_l2_cache_4k_mems[] = { + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", +}; + +static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev, + struct ras_err_data *err_data) +{ + uint32_t i, data; + uint32_t sec_count, ded_count; + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_walker_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_walker_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + utcl2_router_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + utcl2_router_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_2m_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_2m_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_4k_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_4k_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + + return 0; +} + +static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t se_id, uint32_t inst_id, + uint32_t value, uint32_t *sec_count, + uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) { + if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_4_ras_fields[i].seg != reg->seg || + gfx_v9_4_ras_fields[i].inst != reg->inst) + continue; + + sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >> + gfx_v9_4_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >> + gfx_v9_4_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i, j, k; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0, k); + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + if (reg_value) + gfx_v9_4_ras_error_count( + &gfx_v9_4_edc_counter_regs[i], + j, k, reg_value, &sec_count, + &ded_count); + } + } + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; + + gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_4_query_utc_edc_status(adev, err_data); + + return 0; +} + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev) +{ + int i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); +} + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = info->head.sub_block_index; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h new file mode 100644 index 000000000000..2e3f6f755ad4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h @@ -0,0 +1,35 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_4_H__ +#define __GFX_V9_4_H__ + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev); + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); + +#endif /* __GFX_V9_4_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 15986748f59f..1a2f18b908fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -75,40 +75,45 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - /* Program the system aperture low logical page number. */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - - if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) - /* - * Raven2 has a HW issue that it is unable to use the vram which - * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the - * workaround that increase system aperture high address (add 1) - * to get rid of the VM fault and hardware hang. - */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.fb_end >> 18) + 0x1, - adev->gmc.agp_end >> 18)); - else - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - - /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, - (u32)(value >> 12)); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, - (u32)(value >> 44)); - - /* Program "protection fault". */ - WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page_addr >> 44)); - - WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) + /* + * Raven2 has a HW issue that it is unable to use the + * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. + * So here is the workaround that increase system + * aperture high address (add 1) to get rid of the VM + * fault and hardware hang. + */ + WREG32_SOC15_RLC(GC, 0, + mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max((adev->gmc.fb_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); + else + WREG32_SOC15_RLC( + GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + } } static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) @@ -178,6 +183,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); } @@ -262,7 +269,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { /* * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are * VF copy registers so vbios post doesn't program them, for @@ -278,10 +285,12 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_init_gart_aperture_regs(adev); gfxhub_v1_0_init_system_aperture_regs(adev); gfxhub_v1_0_init_tlb_regs(adev); - gfxhub_v1_0_init_cache_regs(adev); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v1_0_init_cache_regs(adev); gfxhub_v1_0_enable_system_domain(adev); - gfxhub_v1_0_disable_identity_aperture(adev); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v1_0_disable_identity_aperture(adev); gfxhub_v1_0_setup_vmid_config(adev); gfxhub_v1_0_program_invalidation(adev); @@ -357,7 +366,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -365,6 +374,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 5e9ab8eb214a..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + u32 max_num_physical_nodes = 0; + u32 max_physical_node_id = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + max_num_physical_nodes = 4; + max_physical_node_id = 3; + break; + case CHIP_ARCTURUS: + max_num_physical_nodes = 8; + max_physical_node_id = 7; + break; + default: + return -EINVAL; + } /* PF_MAX_REGION=0 means xgmi is disabled */ if (max_region) { adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > 4) + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) return -EINVAL; adev->gmc.xgmi.physical_node_id = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > 3) + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) return -EINVAL; adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index d605b4963f8a..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; } -static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ + int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); - - WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - gfxhub_v2_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -140,7 +144,7 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); @@ -151,6 +155,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); tmp = mmGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); tmp = mmGCVM_L2_CNTL4_DEFAULT; @@ -166,6 +179,8 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); } @@ -333,7 +348,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -341,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h index 06807940748b..392b8cd94fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v2_0_init(struct amdgpu_device *adev); u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5eeb72fcc123..9775eca6fe43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -62,7 +65,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_vmhub *hub; u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; - bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -70,7 +73,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; - bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -81,39 +84,39 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_MMHUB]; + tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_GFXHUB]; + tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_MMHUB]; + tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_GFXHUB]; + tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; @@ -136,22 +139,53 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } if (printk_ratelimit()) { + struct amdgpu_task_info task_info; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " + "for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, - entry->pasid); - dev_err(adev->dev, " at page 0x%016llx from %d\n", + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, - "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, RW)); + } } return 0; @@ -188,6 +222,34 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev))); +} + +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -198,13 +260,44 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 tmp; /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { @@ -216,6 +309,16 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -230,8 +333,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * * Flush the TLB for the requested page table. */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct dma_fence *fence; @@ -240,15 +343,23 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, int r; /* flush hdp cache */ - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); mutex_lock(&adev->mman.gtt_window_lock); - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); + if (vmhub == AMDGPU_MMHUB_0) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + BUG_ON(vmhub != AMDGPU_GFXHUB_0); + if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || - adev->in_gpu_reset) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); + adev->in_gpu_reset || + ring->sched.ready == false) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -264,6 +375,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); @@ -285,24 +397,102 @@ error_alloc: DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, flush_type); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, flush_type); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); return pd_addr; } @@ -313,7 +503,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -352,43 +542,23 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 1 system * 0 valid */ -static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - - switch (flags & AMDGPU_VM_MTYPE_MASK) { +static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +{ + switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); default: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); } - - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; } static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -415,12 +585,33 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; + *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags |= AMDGPU_PTE_SNOOPED; + *flags |= AMDGPU_PTE_LOG; + *flags |= AMDGPU_PTE_SYSTEM; + *flags &= ~AMDGPU_PTE_VALID; + } +} + static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, - .get_vm_pde = gmc_v10_0_get_vm_pde + .map_mtype = gmc_v10_0_map_mtype, + .get_vm_pde = gmc_v10_0_get_vm_pde, + .get_vm_pte = gmc_v10_0_get_vm_pte }; static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -449,22 +640,13 @@ static int gmc_v10_0_early_init(void *handle) static int gmc_v10_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; - unsigned i; + int r; - for(i = 0; i < adev->num_rings; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - unsigned vmhub = ring->funcs->vmhub; + amdgpu_bo_late_init(adev); - ring->vm_inv_eng = vm_inv_eng[vmhub]++; - dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", - ring->idx, ring->name, ring->vm_inv_eng, - ring->funcs->vmhub); - } - - /* Engine 17 is used for GART flushes */ - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) - BUG_ON(vm_inv_eng[i] > 17); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); + if (r) + return r; return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } @@ -474,8 +656,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) - base = gfxhub_v2_0_get_fb_location(adev); + base = gfxhub_v2_0_get_fb_location(adev); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); @@ -495,24 +676,13 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) { - int chansize, numchan; - - if (!amdgpu_emu_mode) - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - else { - /* hard code vram_width for emulation */ - chansize = 128; - numchan = 1; - adev->gmc.vram_width = numchan * chansize; - } - /* Could aper size report 0 ? */ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; adev->gmc.visible_vram_size = adev->gmc.aper_size; @@ -524,6 +694,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -589,8 +761,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v10_0_sw_init(void *handle) { - int r; - int dma_bits; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v2_0_init(adev); @@ -598,12 +769,23 @@ static int gmc_v10_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (!amdgpu_emu_mode) + adev->gmc.vram_width = vram_width; + else + adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->num_vmhubs = 2; /* * To fulfill 4-level page support, - * vm size is 256TB (48bit), maximum size of Navi10, + * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, * block size 512 (9bit) */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -616,6 +798,10 @@ static int gmc_v10_0_sw_init(void *handle) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -628,35 +814,10 @@ static int gmc_v10_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * Reserve 8M stolen memory for navi10 like vega10 - * TODO: will check if it's really needed on asic. - */ - if (amdgpu_emu_mode == 1) - adev->gmc.stolen_size = 0; - else - adev->gmc.stolen_size = 9 * 1024 *1024; - - /* - * Set DMA mask + need_dma32 flags. - * PCIE - can handle 44-bits. - * IGP - can handle 44-bits - * PCI - dma32 for legacy pci gart, 44 bits on navi10 - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 44; - - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); - } - - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + return r; } r = gmc_v10_0_mc_init(adev); @@ -680,8 +841,8 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -717,6 +878,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: break; @@ -759,14 +922,15 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* Flush HDP after it is initialized */ - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; gfxhub_v2_0_set_fault_enable_default(adev, value); mmhub_v2_0_set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ca8dbe91cc8b..b205039350b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -362,8 +362,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -386,27 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -571,7 +564,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -839,9 +832,10 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v6_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -862,20 +856,12 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); r = gmc_v6_0_init_microcode(adev); if (r) { @@ -1160,7 +1146,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, - .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags + .get_vm_pte = gmc_v6_0_get_vm_pte, }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 57f80065d57a..9da9596a3638 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,7 +381,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU && + adev->gmc.real_vram_size > adev->gmc.aper_size) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } @@ -418,6 +419,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -433,8 +466,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -463,27 +496,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -677,7 +703,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -959,9 +985,10 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v7_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -990,25 +1017,12 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v7_0_init_microcode(adev); if (r) { @@ -1352,11 +1366,12 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, - .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, - .get_vm_pde = gmc_v7_0_get_vm_pde + .get_vm_pde = gmc_v7_0_get_vm_pde, + .get_vm_pte = gmc_v7_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9238280d1ff7..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -635,8 +668,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (VI). */ -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -686,29 +719,21 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -921,7 +946,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -1079,9 +1104,10 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v8_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -1116,25 +1142,12 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v8_0_init_microcode(adev); if (r) { @@ -1720,11 +1733,12 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, - .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, - .get_vm_pde = gmc_v8_0_get_vm_pde + .get_vm_pde = gmc_v8_0_get_vm_pde, + .get_vm_pte = gmc_v8_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 73f3b79ab131..90216abf14a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,20 +38,27 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "athub_v1_0.h" #include "gfxhub_v1_1.h" +#include "mmhub_v9_4.h" +#include "umc_v6_1.h" +#include "umc_v6_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d @@ -202,6 +209,11 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, { u32 bits, i, tmp, reg; + /* Devices newer then VEGA10/12 shall have these programming + sequences performed by PSP BL */ + if (adev->asic_type >= CHIP_VEGA20) + return 0; + bits = 0x7f; switch (state) { @@ -240,32 +252,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) -{ - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; -} - -static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->gmc.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} - static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -284,7 +270,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -295,7 +281,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -315,10 +301,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_vmhub *hub; bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0; u64 addr; + char hub_name[10]; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -327,8 +314,31 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, entry->timestamp)) return 1; /* This also prevents sending it to KFD */ + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + snprintf(hub_name, sizeof(hub_name), "mmhub0"); + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + snprintf(hub_name, sizeof(hub_name), "mmhub1"); + hub = &adev->vmhub[AMDGPU_MMHUB_1]; + } else { + snprintf(hub_name, sizeof(hub_name), "gfxhub0"); + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + } + /* If it's the first fault for this address, process it normally */ + if (retry_fault && !in_interrupt() && + amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } @@ -342,17 +352,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, "[%s] %s page fault (src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - retry_fault ? "retry" : "no-retry", + hub_name, retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, RW)); + + } } return 0; @@ -366,7 +392,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { .set = gmc_v9_0_ecc_interrupt_state, - .process = gmc_v9_0_process_ecc_irq, + .process = amdgpu_umc_process_ecc_irq, }; static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) @@ -374,8 +400,10 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; - adev->gmc.ecc_irq.num_types = 1; - adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; + if (!amdgpu_sriov_vf(adev)) { + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; + } } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, @@ -397,6 +425,36 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev)) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))); +} + +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -413,54 +471,172 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, * * Flush the TLB for the requested page table using certain type. */ -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; - unsigned i, j; + u32 j, inv_req, tmp; + struct amdgpu_vmhub *hub; - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + BUG_ON(vmhub >= adev->num_vmhubs); - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ - if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); - continue; - } + hub = &adev->vmhub[vmhub]; + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); + + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid); + return; + } + + spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ - spin_lock(&adev->gmc.invalidate_lock); - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - if (tmp & (1 << vmid)) + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) break; udelay(1); } - spin_unlock(&adev->gmc.invalidate_lock); - if (j < adev->usec_timeout) - continue; - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + if (tmp & (1 << vmid)) + break; + udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + + if (j < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); +} + +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, flush_type); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, flush_type); + } + break; + } + } + + return 0; + } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -471,6 +647,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } @@ -480,7 +664,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + /* Do nothing because there's no lut register for mmhub1. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + return; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -520,44 +708,25 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) +static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - - switch (flags & AMDGPU_VM_MTYPE_MASK) { + switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); + case AMDGPU_VM_MTYPE_RW: + return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); default: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); } - - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; } static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -584,12 +753,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags &= ~AMDGPU_PTE_VALID; + } + + if (adev->asic_type == CHIP_ARCTURUS && + !(*flags & AMDGPU_PTE_SYSTEM) && + mapping->bo_va->is_xgmi) + *flags |= AMDGPU_PTE_SNOOPED; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, - .get_vm_pde = gmc_v9_0_get_vm_pde + .map_mtype = gmc_v9_0_map_mtype, + .get_vm_pde = gmc_v9_0_get_vm_pde, + .get_vm_pte = gmc_v9_0_get_vm_pte }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -597,12 +789,55 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->umc.funcs = &umc_v6_0_funcs; + break; + case CHIP_VEGA20: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + case CHIP_ARCTURUS: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + default: + break; + } +} + +static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->mmhub.funcs = &mmhub_v1_0_funcs; + break; + case CHIP_ARCTURUS: + adev->mmhub.funcs = &mmhub_v9_4_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + gmc_v9_0_set_umc_funcs(adev); + gmc_v9_0_set_mmhub_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -629,6 +864,8 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: return true; case CHIP_VEGA12: case CHIP_VEGA20: @@ -637,136 +874,15 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) } } -static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; - unsigned i; - unsigned vmhub, inv_eng; - - for (i = 0; i < adev->num_rings; ++i) { - ring = adev->rings[i]; - vmhub = ring->funcs->vmhub; - - inv_eng = ffs(vm_inv_engs[vmhub]); - if (!inv_eng) { - dev_err(adev->dev, "no VM inv eng for ring %s\n", - ring->name); - return -EINVAL; - } - - ring->vm_inv_eng = inv_eng - 1; - vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); - - dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", - ring->name, ring->vm_inv_eng, ring->funcs->vmhub); - } - - return 0; -} - -static int gmc_v9_0_ecc_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->gmc.ras_if; - struct ras_ih_if ih_info = { - .cb = gmc_v9_0_process_ras_data_cb, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "umc_err_count", - .debugfs_name = "umc_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__UMC, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "umc", - }; - int r; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); - return 0; - } - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__UMC); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = ras_block; - - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__UMC); - r = 0; - } - goto feature; - } - - ih_info.head = **ras_if; - fs_info.head = **ras_if; - - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - - amdgpu_ras_debugfs_create(adev, &fs_info); - - r = amdgpu_ras_sysfs_create(adev, &fs_info); - if (r) - goto sysfs; -resume: - r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); - if (r) - goto irq; - - return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; -} - - static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool r; + int r; if (!gmc_v9_0_keep_stolen_memory(adev)) amdgpu_bo_late_init(adev); - r = gmc_v9_0_allocate_vm_inv_eng(adev); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; /* Check if ecc is available */ @@ -774,11 +890,12 @@ static int gmc_v9_0_late_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA20: + case CHIP_ARCTURUS: r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); - if (adev->df_funcs->enable_ecc_force_par_wr_rmw) - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_INFO("ECC is active.\n"); } @@ -795,7 +912,7 @@ static int gmc_v9_0_late_init(void *handle) } } - r = gmc_v9_0_ecc_late_init(handle); + r = amdgpu_gmc_ras_late_init(adev); if (r) return r; @@ -806,14 +923,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) + + if (adev->asic_type == CHIP_ARCTURUS) + base = mmhub_v9_4_get_fb_location(adev); + else if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); - if (!amdgpu_sriov_vf(adev)) - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -833,33 +953,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - int chansize, numchan; int r; - if (amdgpu_sriov_vf(adev)) { - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - } else if (amdgpu_emu_mode != 1) { - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - } - - if (!adev->gmc.vram_width) { - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - - numchan = adev->df_funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; - } - /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { @@ -887,10 +985,12 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ case CHIP_VEGA20: + case CHIP_ARCTURUS: default: adev->gmc.gart_size = 512ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ + case CHIP_RENOIR: adev->gmc.gart_size = 1024ULL << 20; break; } @@ -923,7 +1023,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + u32 d1vga_control; unsigned size; /* @@ -933,6 +1033,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) if (gmc_v9_0_keep_stolen_memory(adev)) return 9 * 1024 * 1024; + d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -940,6 +1041,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); size = (REG_GET_FIELD(viewport, HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -967,18 +1069,47 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r; - int dma_bits; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); - mmhub_v1_0_init(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_init(adev); + else + mmhub_v1_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; + + if (!adev->gmc.vram_width) { + int chansize, numchan; + + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_RAVEN: + adev->num_vmhubs = 2; + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { @@ -991,6 +1122,10 @@ static int gmc_v9_0_sw_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_RENOIR: + adev->num_vmhubs = 2; + + /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -1002,6 +1137,12 @@ static int gmc_v9_0_sw_init(void *handle) else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; + case CHIP_ARCTURUS: + adev->num_vmhubs = 3; + + /* Keep the vm size same with Vega20 */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; default: break; } @@ -1012,17 +1153,26 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->asic_type == CHIP_ARCTURUS) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + } + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); if (r) return r; - /* interrupt sent to DF. */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, - &adev->gmc.ecc_irq); - if (r) - return r; + if (!amdgpu_sriov_vf(adev)) { + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + } /* Set the internal MC address mask * This is the max address of the GPU's @@ -1030,25 +1180,12 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 44-bits. - * IGP - can handle 44-bits - * PCI - dma32 for legacy pci gart, 44 bits on vega10 - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 44; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); @@ -1077,8 +1214,9 @@ static int gmc_v9_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -1088,28 +1226,14 @@ static int gmc_v9_0_sw_init(void *handle) static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->gmc.ras_if) { - struct ras_common_if *ras_if = adev->gmc.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - /*remove fs first*/ - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - /*remove the IH*/ - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } - + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); if (gmc_v9_0_keep_stolen_memory(adev)) - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); @@ -1123,7 +1247,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) break; /* fall through */ case CHIP_VEGA20: @@ -1137,6 +1261,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_VEGA12: break; case CHIP_RAVEN: + /* TODO for renoir */ soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); @@ -1154,12 +1279,6 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - bool value; - u32 tmp; - - amdgpu_device_program_register_sequence(adev, - golden_settings_vega10_hdp, - ARRAY_SIZE(golden_settings_vega10_hdp)); if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -1169,42 +1288,17 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - switch (adev->asic_type) { - case CHIP_RAVEN: - mmhub_v1_0_update_power_gating(adev, true); - break; - default: - break; - } - r = gfxhub_v1_0_gart_enable(adev); if (r) return r; - r = mmhub_v1_0_gart_enable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + r = mmhub_v9_4_gart_enable(adev); + else + r = mmhub_v1_0_gart_enable(adev); if (r) return r; - WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); - - tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); - WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); - - WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); - WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); - - /* After HDP is initialized, flush HDP.*/ - adev->nbio_funcs->hdp_flush(adev, NULL); - - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) - value = false; - else - value = true; - - gfxhub_v1_0_set_fault_enable_default(adev, value); - mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0, 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); @@ -1214,20 +1308,69 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) static int gmc_v9_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool value; + int r, i; + u32 tmp; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); if (adev->mode_info.num_crtc) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + if (adev->asic_type != CHIP_ARCTURUS) { + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + } } + amdgpu_device_program_register_sequence(adev, + golden_settings_vega10_hdp, + ARRAY_SIZE(golden_settings_vega10_hdp)); + + switch (adev->asic_type) { + case CHIP_RAVEN: + /* TODO for renoir */ + mmhub_v1_0_update_power_gating(adev, true); + break; + case CHIP_ARCTURUS: + WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); + break; + default: + break; + } + + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + + /* After HDP is initialized, flush HDP.*/ + adev->nbio.funcs->hdp_flush(adev, NULL); + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + if (!amdgpu_sriov_vf(adev)) { + gfxhub_v1_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_fault_enable_default(adev, value); + else + mmhub_v1_0_set_fault_enable_default(adev, value); + } + for (i = 0; i < adev->num_vmhubs; ++i) + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); + + if (adev->umc.funcs && adev->umc.funcs->init_registers) + adev->umc.funcs->init_registers(adev); + r = gmc_v9_0_gart_enable(adev); return r; @@ -1243,7 +1386,10 @@ static int gmc_v9_0_hw_init(void *handle) static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { gfxhub_v1_0_gart_disable(adev); - mmhub_v1_0_gart_disable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_gart_disable(adev); + else + mmhub_v1_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1308,14 +1454,26 @@ static int gmc_v9_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return mmhub_v1_0_set_clockgating(adev, state); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_clockgating(adev, state); + else + mmhub_v1_0_set_clockgating(adev, state); + + athub_v1_0_set_clockgating(adev, state); + + return 0; } static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - mmhub_v1_0_get_clockgating(adev, flags); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_get_clockgating(adev, flags); + else + mmhub_v1_0_get_clockgating(adev, flags); + + athub_v1_0_get_clockgating(adev, flags); } static int gmc_v9_0_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 5c8deac65580..e0585e8c6c1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -24,17 +24,6 @@ #ifndef __GMC_V9_0_H__ #define __GMC_V9_0_H__ - /* - * The latest engine allocation on gfx9 is: - * Engine 2, 3: firmware - * Engine 0, 1, 4~16: amdgpu ring, - * subject to change when ring number changes - * Engine 17: Gart flushes - */ -#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 -#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 - extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c new file mode 100644 index 000000000000..0debfd9f428c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -0,0 +1,586 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v1_0.h" + +#include "vcn/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_sh_mask.h" + +static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); + +static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[(*ptr)++] = 0; + ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); + } else { + ring->ring[(*ptr)++] = reg_offset; + ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); + } + ring->ring[(*ptr)++] = val; +} + +static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) +{ + struct amdgpu_device *adev = ring->adev; + + uint32_t reg, reg_offset, val, mask, i; + + // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); + reg_offset = (reg << 2); + val = lower_32_bits(ring->gpu_addr); + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); + reg_offset = (reg << 2); + val = upper_32_bits(ring->gpu_addr); + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 3rd to 5th: issue MEM_READ commands + for (i = 0; i <= 2; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); + ring->ring[ptr++] = 0; + } + + // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x13; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 7th: program mmUVD_JRBC_RB_REF_DATA + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA); + reg_offset = (reg << 2); + val = 0x1; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x1; + mask = 0x1; + + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = 0x01400200; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = val; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[ptr++] = 0; + ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); + } else { + ring->ring[ptr++] = reg_offset; + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); + } + ring->ring[ptr++] = mask; + + //9th to 21st: insert no-op + for (i = 0; i <= 12; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ring->ring[ptr++] = 0; + } + + //22nd: reset mmUVD_JRBC_RB_RPTR + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR); + reg_offset = (reg << 2); + val = 0; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x12; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); +} + +/** + * jpeg_v1_0_decode_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v1_0_decode_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v1_0_decode_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * jpeg_v1_0_decode_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * jpeg_v1_0_decode_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0xffffffff); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + /* emit trap */ + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG decode TRAP\n"); + + switch (entry->src_id) { + case 126: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +/** + * jpeg_v1_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +int jpeg_v1_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v1_0_set_dec_ring_funcs(adev); + jpeg_v1_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v1_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + */ +int jpeg_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch = + SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v1_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG free up sw allocation + */ +void jpeg_v1_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec); +} + +/** + * jpeg_v1_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + + if (mode == 0) { + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + } + + /* initialize wptr */ + ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + jpeg_v1_0_decode_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); +} + +static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .nop = PACKET0(0x81ff, 0), + .support_64bit_ptrs = false, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, + .extra_dw = 64, + .get_rptr = jpeg_v1_0_decode_ring_get_rptr, + .get_wptr = jpeg_v1_0_decode_ring_get_wptr, + .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .emit_frame_size = + 6 + 6 + /* hdp invalidate / flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */ + 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */ + .emit_ib = jpeg_v1_0_decode_ring_emit_ib, + .emit_fence = jpeg_v1_0_decode_ring_emit_fence, + .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v1_0_decode_ring_nop, + .insert_start = jpeg_v1_0_decode_ring_insert_start, + .insert_end = jpeg_v1_0_decode_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = vcn_v1_0_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg, + .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs; + DRM_INFO("JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = { + .set = jpeg_v1_0_set_interrupt_state, + .process = jpeg_v1_0_process_interrupt, +}; + +static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h new file mode 100644 index 000000000000..bbf33a6a3972 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -0,0 +1,32 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V1_0_H__ +#define __JPEG_V1_0_H__ + +int jpeg_v1_0_early_init(void *handle); +int jpeg_v1_0_sw_init(void *handle); +void jpeg_v1_0_sw_fini(void *handle); +void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); + +#endif /*__JPEG_V1_0_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c new file mode 100644 index 000000000000..ff2e6e1ccde7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -0,0 +1,827 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" + +#include "vcn/vcn_2_0_0_offset.h" +#include "vcn/vcn_2_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 +#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a +#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb +#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf +#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 +#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed +#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 +#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 +#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 + +static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v2_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v2_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v2_0_set_dec_ring_funcs(adev); + jpeg_v2_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v2_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v2_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v2_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v2_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v2_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v2_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + r = amdgpu_ring_test_helper(ring); + if (!r) + DRM_INFO("JPEG decode initialized successfully.\n"); + + return r; +} + +/** + * jpeg_v2_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v2_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) + jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + return 0; +} + +/** + * jpeg_v2_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v2_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v2_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v2_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v2_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v2_0_hw_init(adev); + + return r; +} + +static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int r = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + SOC15_WAIT_ON_RREG(JPEG, 0, + mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + return r; + } + } + + /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ + data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data); + + return 0; +} + +static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) +{ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + uint32_t data; + int r = 0; + + data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)); + data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + data |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data); + + data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, + (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + return r; + } + } + + return 0; +} + +static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); +} + +static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); +} + +/** + * jpeg_v2_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v2_0_disable_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v2_0_disable_clock_gating(adev); + + WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v2_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable JPEG CGC */ + jpeg_v2_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v2_0_enable_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v2_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v2_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v2_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * jpeg_v2_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * jpeg_v2_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static bool jpeg_v2_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v2_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + + return ret; +} + +static int jpeg_v2_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + + if (enable) { + if (jpeg_v2_0_is_idle(handle)) + return -EBUSY; + jpeg_v2_0_enable_clock_gating(adev); + } else { + jpeg_v2_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v2_0_stop(adev); + else + ret = jpeg_v2_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { + .name = "jpeg_v2_0", + .early_init = jpeg_v2_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v2_0_sw_init, + .sw_fini = jpeg_v2_0_sw_fini, + .hw_init = jpeg_v2_0_hw_init, + .hw_fini = jpeg_v2_0_hw_fini, + .suspend = jpeg_v2_0_suspend, + .resume = jpeg_v2_0_resume, + .is_idle = jpeg_v2_0_is_idle, + .wait_for_idle = jpeg_v2_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v2_0_set_clockgating_state, + .set_powergating_state = jpeg_v2_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = jpeg_v2_0_dec_ring_get_rptr, + .get_wptr = jpeg_v2_0_dec_ring_get_wptr, + .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs; + DRM_INFO("JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = { + .set = jpeg_v2_0_set_interrupt_state, + .process = jpeg_v2_0_process_interrupt, +}; + +static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v2_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h new file mode 100644 index 000000000000..15a344ed340f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -0,0 +1,42 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V2_0_H__ +#define __JPEG_V2_0_H__ + +void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); + +extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; + +#endif /* __JPEG_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c new file mode 100644 index 000000000000..c6d046df4b70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -0,0 +1,641 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static int amdgpu_ih_clientid_jpeg[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * jpeg_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->jpeg.harvest_config |= 1 << i; + } + + if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 | + AMDGPU_JPEG_HARVEST_JPEG1)) + return -ENOENT; + } else + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v2_5_set_dec_ring_funcs(adev); + jpeg_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v2_5_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); + } + + return 0; +} + +/** + * jpeg_v2_5_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v2_5_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + DRM_INFO("JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) + jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + } + + return 0; +} + +/** + * jpeg_v2_5_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v2_5_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v2_5_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v2_5_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v2_5_hw_init(adev); + + return r; +} + +static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data); +} + +static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v2_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); + } + + return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v2_5_enable_clock_gating(adev, i); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + } + + return ret; +} + +static int jpeg_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + if (ret) + return ret; + } + + return ret; +} + +static int jpeg_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (enable) { + if (jpeg_v2_5_is_idle(handle)) + return -EBUSY; + jpeg_v2_5_enable_clock_gating(adev, i); + } else { + jpeg_v2_5_disable_clock_gating(adev, i); + } + } + + return 0; +} + +static int jpeg_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v2_5_stop(adev); + else + ret = jpeg_v2_5_start(adev); + + if(!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { + .name = "jpeg_v2_5", + .early_init = jpeg_v2_5_early_init, + .late_init = NULL, + .sw_init = jpeg_v2_5_sw_init, + .sw_fini = jpeg_v2_5_sw_fini, + .hw_init = jpeg_v2_5_hw_init, + .hw_fini = jpeg_v2_5_hw_fini, + .suspend = jpeg_v2_5_suspend, + .resume = jpeg_v2_5_resume, + .is_idle = jpeg_v2_5_is_idle, + .wait_for_idle = jpeg_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v2_5_set_clockgating_state, + .set_powergating_state = jpeg_v2_5_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = jpeg_v2_5_dec_ring_get_rptr, + .get_wptr = jpeg_v2_5_dec_ring_get_wptr, + .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec.me = i; + DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i); + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = { + .set = jpeg_v2_5_set_interrupt_state, + .process = jpeg_v2_5_process_interrupt, +}; + +static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].irq.num_types = 1; + adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs; + } +} + +const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &jpeg_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h new file mode 100644 index 000000000000..2b4087c02620 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V2_5_H__ +#define __JPEG_V2_5_H__ + +extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; + +#endif /* __JPEG_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dc5ce03034d3..49a3a56ec017 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -21,15 +21,14 @@ * */ #include "amdgpu.h" +#include "amdgpu_ras.h" #include "mmhub_v1_0.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" -#include "athub/athub_1_0_offset.h" -#include "athub/athub_1_0_sh_mask.h" #include "vega10_enum.h" - +#include "soc15.h" #include "soc15_common.h" #define mmDAGB0_CNTL_MISC2_RV 0x008f @@ -111,7 +110,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Set default page address. */ @@ -159,7 +158,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Setup L2 cache */ @@ -203,12 +202,14 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); } static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, @@ -348,7 +349,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_virt_support_skip_setting(adev)) { + if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); @@ -367,7 +368,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -407,7 +408,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -415,6 +416,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = @@ -491,22 +494,6 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); } -static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) - data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; - - if (def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { @@ -523,23 +510,6 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); } -static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - - if(def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { @@ -551,14 +521,11 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: mmhub_v1_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v1_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -569,18 +536,218 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - int data; + int data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; + data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + /* AMD_CG_SUPPORT_MC_MGCG */ - data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + if ((data & ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) *flags |= AMD_CG_SUPPORT_MC_MGCG; /* AMD_CG_SUPPORT_MC_LS */ - data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } + +static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = { + { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + } +}; + +static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = { + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0}, +}; + +static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t value, uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) { + if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset) + continue; + + sec_cnt = (value & + mmhub_v1_0_ras_fields[i].sec_count_mask) >> + mmhub_v1_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("MMHUB SubBlock %s, SEC %d\n", + mmhub_v1_0_ras_fields[i].name, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + mmhub_v1_0_ras_fields[i].ded_count_mask) >> + mmhub_v1_0_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("MMHUB SubBlock %s, DED %d\n", + mmhub_v1_0_ras_fields[i].name, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i; + uint32_t reg_value; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) { + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i])); + if (reg_value) + mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i], + reg_value, &sec_count, &ded_count); + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; +} + +const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, + .query_ras_error_count = mmhub_v1_0_query_ras_error_count, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 0de0fdf98c00..c43319e8f945 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -23,6 +23,8 @@ #ifndef __MMHUB_V1_0_H__ #define __MMHUB_V1_0_H__ +extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 0f9549f19ade..bde189680521 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -31,20 +31,25 @@ #include "soc15_common.h" -static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ + int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - mmhub_v2_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -126,7 +131,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); @@ -137,6 +142,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); tmp = mmMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); tmp = mmMMVM_L2_CNTL4_DEFAULT; @@ -152,6 +166,8 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); } @@ -324,7 +340,7 @@ void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -332,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = @@ -406,10 +424,12 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: mmhub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h index db16f3ece218..3ea4344f0315 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h @@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev); int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c new file mode 100644 index 000000000000..a5281df8d84f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -0,0 +1,1602 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include "mmhub_v9_4.h" + +#include "mmhub/mmhub_9_4_1_offset.h" +#include "mmhub/mmhub_9_4_1_sh_mask.h" +#include "mmhub/mmhub_9_4_1_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" +#include "soc15.h" +#include "soc15_common.h" + +#define MMHUB_NUM_INSTANCES 2 +#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000 + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) +{ + /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ + u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); + u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); + + base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; + top <<= 24; + + adev->gmc.fb_start = base; + adev->gmc.fb_end = top; + + return base; +} + +static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value) +{ + /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to + * mmVML2VC0_VM_CONTEXT1_* + */ + int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + lower_32_bits(value)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + upper_32_bits(value)); + +} + +static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 44)); +} + +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid, + page_table_base); +} + +static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_end >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_start >> 24); + + if (!amdgpu_sriov_vf(adev)) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 12)); + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15_OFFSET( + MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + tmp); + } +} + +static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ATC_EN, 1); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, + int hubid) +{ + uint32_t tmp; + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, + int hubid) +{ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +} + +static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, + tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, + int hubid) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0x1f); + } +} + +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + /* GART Enable. */ + mmhub_v9_4_init_gart_aperture_regs(adev, i); + mmhub_v9_4_init_system_aperture_regs(adev, i); + mmhub_v9_4_init_tlb_regs(adev, i); + if (!amdgpu_sriov_vf(adev)) + mmhub_v9_4_init_cache_regs(adev, i); + + mmhub_v9_4_enable_system_domain(adev, i); + if (!amdgpu_sriov_vf(adev)) + mmhub_v9_4_disable_identity_aperture(adev, i); + mmhub_v9_4_setup_vmid_config(adev, i); + mmhub_v9_4_program_invalidation(adev, i); + } + + return 0; +} + +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, j; + + for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET + + i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + } +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + if (!value) { + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + } +} + +void mmhub_v9_4_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = + {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + hub[i]->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + } +} + +static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + int i, j; + int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + + for (j = 0; j < 5; j++) { + def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist); + if (enable && + (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data1 &= + ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data1 |= + (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def1 != data1) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist, data1); + + if (i == 1 && j == 3) + break; + } + } +} + +static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + } +} + +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + mmhub_v9_4_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + mmhub_v9_4_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_MC_MGCG */ + data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = { + /* MMHUB Range 0 */ + { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 1 */ + { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHAB Range 2*/ + { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Rang 3 */ + { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 4 */ + { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUAB Range 5 */ + { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 6 */ + { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 7*/ + { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT), + } +}; + +static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = { + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 }, +}; + +static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t value, uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) { + if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset) + continue; + + sec_cnt = (value & + mmhub_v9_4_ras_fields[i].sec_count_mask) >> + mmhub_v9_4_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("MMHUB SubBlock %s, SEC %d\n", + mmhub_v9_4_ras_fields[i].name, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + mmhub_v9_4_ras_fields[i].ded_count_mask) >> + mmhub_v9_4_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("MMHUB SubBlock %s, DED %d\n", + mmhub_v9_4_ras_fields[i].name, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i; + uint32_t reg_value; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) { + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i])); + if (reg_value) + mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i], + reg_value, &sec_count, &ded_count); + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; +} + +const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, + .query_ras_error_count = mmhub_v9_4_query_ras_error_count, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h new file mode 100644 index 000000000000..1b979773776c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -0,0 +1,40 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V9_4_H__ +#define __MMHUB_V9_4_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs; + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v9_4_init(struct amdgpu_device *adev); +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h index 8af0bddf85e4..20958639b601 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -47,6 +47,18 @@ struct mmsch_v1_0_init_header { uint32_t uvd_table_size; }; +struct mmsch_vf_eng_init_header { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v1_1_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_vf_eng_init_header eng[2]; +}; + struct mmsch_v1_0_cmd_direct_reg_header { uint32_t reg_offset : 28; uint32_t command_type : 4; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 235548c0b41f..5fd67e1cc2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -158,82 +158,6 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_ai_mailbox_set_valid(adev, false); } -static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf) -{ - int r = 0; - u32 req, val, size; - - if (!amdgim_is_hwperf(adev) || buf == NULL) - return -EBADRQC; - - switch(type) { - case PP_SCLK: - req = IDH_IRQ_GET_PP_SCLK; - break; - case PP_MCLK: - req = IDH_IRQ_GET_PP_MCLK; - break; - default: - return -EBADRQC; - } - - mutex_lock(&adev->virt.dpm_mutex); - - xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); - - r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); - if (!r && adev->fw_vram_usage.va != NULL) { - val = RREG32_NO_KIQ( - SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1)); - size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) + - val), PAGE_SIZE); - - if (size < PAGE_SIZE) - strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val)); - else - size = 0; - - r = size; - goto out; - } - - r = xgpu_ai_poll_msg(adev, IDH_FAIL); - if(r) - pr_info("%s DPM request failed", - (type == PP_SCLK)? "SCLK" : "MCLK"); - -out: - mutex_unlock(&adev->virt.dpm_mutex); - return r; -} - -static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level) -{ - int r = 0; - u32 req = IDH_IRQ_FORCE_DPM_LEVEL; - - if (!amdgim_is_hwperf(adev)) - return -EBADRQC; - - mutex_lock(&adev->virt.dpm_mutex); - xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0); - - r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); - if (!r) - goto out; - - r = xgpu_ai_poll_msg(adev, IDH_FAIL); - if (!r) - pr_info("DPM request failed"); - else - pr_info("Mailbox is broken"); - -out: - mutex_unlock(&adev->virt.dpm_mutex); - return r; -} - static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { @@ -326,7 +250,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) */ locked = mutex_trylock(&adev->lock_reset); if (locked) - adev->in_gpu_reset = 1; + adev->in_gpu_reset = true; do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -338,7 +262,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) flr_done: if (locked) { - adev->in_gpu_reset = 0; + adev->in_gpu_reset = false; mutex_unlock(&adev->lock_reset); } @@ -449,27 +373,10 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) -{ - adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; - - /* Enable L1 security reg access mode by defaul, as non-security VF - * will no longer be supported. - */ - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; -} - const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, - .get_pp_clk = xgpu_ai_get_pp_clk, - .force_dpm_level = xgpu_ai_force_dpm_level, - .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 077e91a33d62..37dbe0f2142f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -35,10 +35,6 @@ enum idh_request { IDH_REL_GPU_FINI_ACCESS, IDH_REQ_GPU_RESET_ACCESS, - IDH_IRQ_FORCE_DPM_LEVEL = 10, - IDH_IRQ_GET_PP_SCLK, - IDH_IRQ_GET_PP_MCLK, - IDH_LOG_VF_ERROR = 200, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c new file mode 100644 index 000000000000..237fa5e16b7c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -0,0 +1,384 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "nbio/nbio_2_3_offset.h" +#include "nbio/nbio_2_3_sh_mask.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "soc15.h" +#include "navi10_ih.h" +#include "soc15_common.h" +#include "mxgpu_nv.h" +#include "mxgpu_ai.h" + +static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev) +{ + WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); +} + +static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val) +{ + WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); +} + +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + * RCV_MSG_VALID is set by host. + */ +static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev) +{ + return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); +} + + +static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, + enum idh_event event) +{ + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); + if (reg != event) + return -ENOENT; + + xgpu_nv_mailbox_send_ack(adev); + + return 0; +} + +static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) +{ + return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; +} + +static int xgpu_nv_poll_ack(struct amdgpu_device *adev) +{ + int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT; + u8 reg; + + do { + reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE); + if (reg & 2) + return 0; + + mdelay(5); + timeout -= 5; + } while (timeout > 1); + + pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); + + return -ETIME; +} + +static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) +{ + int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; + + do { + r = xgpu_nv_mailbox_rcv_msg(adev, event); + if (!r) + return 0; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + + pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + + return -ETIME; +} + +static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) +{ + u32 reg; + int r; + uint8_t trn; + + /* IMPORTANT: + * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK + * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK + * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack() + * will return immediatly + */ + do { + xgpu_nv_mailbox_set_valid(adev, false); + trn = xgpu_nv_peek_ack(adev); + if (trn) { + pr_err("trn=%x ACK should not assert! wait again !\n", trn); + msleep(1); + } + } while (trn); + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0), + reg); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1), + data1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2), + data2); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3), + data3); + + xgpu_nv_mailbox_set_valid(adev, true); + + /* start to poll ack */ + r = xgpu_nv_poll_ack(adev); + if (r) + pr_err("Doesn't get ack from pf, continue\n"); + + xgpu_nv_mailbox_set_valid(adev, false); +} + +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + + /* start to check msg if request is idh_req_gpu_init_access */ + if (req == IDH_REQ_GPU_INIT_ACCESS || + req == IDH_REQ_GPU_FINI_ACCESS || + req == IDH_REQ_GPU_RESET_ACCESS) { + r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); + return r; + } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2)); + } + } + + return 0; +} + +static int xgpu_nv_request_reset(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +} + +static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + + req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; + return xgpu_nv_send_access_requests(adev, req); +} + +static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + int r = 0; + + req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; + r = xgpu_nv_send_access_requests(adev, req); + + return r; +} + +static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("get ack intr and do nothing.\n"); + return 0; +} + +static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; + + /* block amdgpu_gpu_recover till msg FLR COMPLETE received, + * otherwise the mailbox msg will be ruined/reseted by + * the VF FLR. + * + * we can unlock the lock_reset to allow "amdgpu_job_timedout" + * to run gpu_recover() after FLR_NOTIFICATION_CMPL received + * which means host side had finished this VF's FLR. + */ + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = true; + + do { + if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) + goto flr_done; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + +flr_done: + if (locked) { + adev->in_gpu_reset = false; + mutex_unlock(&adev->lock_reset); + } + + /* Trigger recovery for world switch failure if no TDR */ + if (amdgpu_device_should_recover_gpu(adev) + && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) + amdgpu_device_gpu_recover(adev, NULL); +} + +static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + + switch (event) { + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.flr_work); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. + */ + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = { + .set = xgpu_nv_set_mailbox_ack_irq, + .process = xgpu_nv_mailbox_ack_irq, +}; + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = { + .set = xgpu_nv_set_mailbox_rcv_irq, + .process = xgpu_nv_mailbox_rcv_irq, +}; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->virt.ack_irq.num_types = 1; + adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs; + adev->virt.rcv_irq.num_types = 1; + adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs; +} + +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); + if (r) + return r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + return 0; +} + +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); + + return 0; +} + +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) +{ + amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); +} + +const struct amdgpu_virt_ops xgpu_nv_virt_ops = { + .req_full_gpu = xgpu_nv_request_full_gpu_access, + .rel_full_gpu = xgpu_nv_release_full_gpu_access, + .reset_gpu = xgpu_nv_request_reset, + .wait_reset = NULL, + .trans_msg = xgpu_nv_mailbox_trans_msg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h new file mode 100644 index 000000000000..99b15f6865cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -0,0 +1,41 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MXGPU_NV_H__ +#define __MXGPU_NV_H__ + +#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 12000 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500 + +extern const struct amdgpu_virt_ops xgpu_nv_virt_ops; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev); +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev); +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev); +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev); + +#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4) +#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e963746be11c..cf557a428298 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -21,7 +21,8 @@ * */ -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" @@ -109,14 +110,13 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl static int navi10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih = &adev->irq.ih; - int ret = 0; u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken; u32 tmp; /* disable irqs */ navi10_ih_disable_interrupts(adev); - adev->nbio_funcs->ih_control(adev); + adev->nbio.funcs->ih_control(adev); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); @@ -161,7 +161,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); - adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell, + adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell, ih->doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); @@ -178,7 +178,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ navi10_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -426,7 +426,7 @@ static int navi10_ih_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; navi10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c index 55014ce8670a..88efaecf9f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -24,25 +24,12 @@ #include "nv.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "navi10_ip_offset.h" int navi10_reg_base_init(struct amdgpu_device *adev) { - int r, i; + int i; - if (amdgpu_discovery) { - r = amdgpu_discovery_reg_base_init(adev); - if (r) { - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - goto legacy_init; - } - - return 0; - } - -legacy_init: for (i = 0 ; i < MAX_INSTANCE ; ++i) { adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c new file mode 100644 index 000000000000..a786d159e5e9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "navi12_ip_offset.h" + +int navi12_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocks needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c new file mode 100644 index 000000000000..4ea1e8fbb601 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "navi14_ip_offset.h" + +int navi14_reg_base_init(struct amdgpu_device *adev) +{ + int i; + + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 835d7b1a841f..f3a3fe746222 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -27,11 +27,21 @@ #include "nbio/nbio_2_3_default.h" #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> #define smnPCIE_CONFIG_CNTL 0x11180044 #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 + +static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) @@ -92,7 +101,7 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan } static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index) + int doorbell_index, int instance) { u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); @@ -311,7 +320,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { - .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset, @@ -331,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .ih_control = nbio_v2_3_ih_control, .init_registers = nbio_v2_3_init_registers, .detect_hw_virt = nbio_v2_3_detect_hw_virt, + .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 5ae52085f6b7..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 6590143c3f75..635d9e1fc0a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -277,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { - .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 0743a6f016f3..6dc743b73218 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 73419fa38159..d6cbf26074bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -91,6 +91,26 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32(reg, doorbell_range); } +static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -272,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { - .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, @@ -282,6 +301,7 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .hdp_flush = nbio_v7_0_hdp_flush, .get_memsize = nbio_v7_0_get_memsize, .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index 508d549c5029..e7aefb252550 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index bfaaa327ae3c..65eb378fa035 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -23,14 +23,38 @@ #include "amdgpu.h" #include "amdgpu_atombios.h" #include "nbio_v7_4.h" +#include "amdgpu_ras.h" #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +/* + * These are nbio v7_4_1 registers mask. Temporarily define these here since + * nbio v7_4_1 header is incomplete. + */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L + +#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +//BIF_MMSCH1_DOORBELL_RANGE +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2 +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10 +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L + +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -75,10 +99,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + u32 reg, doorbell_range; - u32 doorbell_range = RREG32(reg); + if (instance < 2) + reg = instance + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + else + /* + * These registers address of SDMA2~7 is not consecutive + * from SDMA0~1. Need plus 4 dwords offset. + * + * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 + * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 + * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 + */ + reg = instance + 0x4 + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -89,6 +127,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32(reg, doorbell_range); } +static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg; + u32 doorbell_range; + + if (instance) + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); + else + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -207,7 +271,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -220,6 +284,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, }; static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) @@ -241,17 +311,224 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { - uint32_t def, data; - def = data = RREG32_PCIE(smnPCIE_CI_CNTL); - data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); +} - if (def != data) - WREG32_PCIE(smnPCIE_CI_CNTL, data); +static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + /* + * clear error status after ras_controller_intr according to + * hw team and count ue number for query + */ + nbio_v7_4_query_ras_error_count(adev, &obj->err_data); + + DRM_WARN("RAS controller interrupt triggered by NBIF error\n"); + + /* ras_controller_int is dedicated for nbif ras error, + * not the global interrupt for sync flood + */ + amdgpu_ras_reset_gpu(adev); + } +} + +static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + + +static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = { + .set = nbio_v7_4_set_ras_controller_irq_state, + .process = nbio_v7_4_process_ras_controller_irq, +}; + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = { + .set = nbio_v7_4_set_ras_err_event_athub_irq_state, + .process = nbio_v7_4_process_err_event_athub_irq, +}; + +static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_controller_irq.funcs = + &nbio_v7_4_ras_controller_irq_funcs; + adev->nbio.ras_controller_irq.num_types = 1; + + /* register ras controller interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, + &adev->nbio.ras_controller_irq); + + return r; +} + +static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) +{ + + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbio_v7_4_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; +} + +#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030 + +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + uint32_t global_sts, central_sts, int_eoi, parity_sts; + uint32_t corr, fatal, non_fatal; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO); + corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr); + fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal); + non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, + ParityErrNonFatal); + parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2); + + if (corr) + err_data->ce_count++; + if (fatal) + err_data->ue_count++; + + if (corr || fatal || non_fatal) { + central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS); + /* clear error status register */ + WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts); + + if (fatal) + /* clear parity fatal error indication field */ + WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2, + parity_sts); + + if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS, + BIFL_RasContller_Intr_Recv)) { + /* clear interrupt status register */ + WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts); + int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI); + int_eoi = REG_SET_FIELD(int_eoi, + IOHC_INTERRUPT_EOI, SMI_EOI, 1); + WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi); + } + } +} + +static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { - .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset, @@ -261,9 +538,11 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .hdp_flush = nbio_v7_4_hdp_flush, .get_memsize = nbio_v7_4_get_memsize, .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, + .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt, .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, .get_clockgating_state = nbio_v7_4_get_clockgating_state, @@ -271,4 +550,10 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, + .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt, + .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt, + .query_ras_error_count = nbio_v7_4_query_ras_error_count, + .ras_late_init = amdgpu_nbio_ras_late_init, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index c442865bac4f..b1ac82872752 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 662612f89c70..2d1bebdf1603 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -23,7 +23,8 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -39,19 +40,23 @@ #include "gc/gc_10_1_0_sh_mask.h" #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" #include "soc15.h" #include "soc15_common.h" #include "gmc_v10_0.h" #include "gfxhub_v2_0.h" #include "mmhub_v2_0.h" +#include "nbio_v2_3.h" #include "nv.h" #include "navi10_ih.h" #include "gfx_v10_0.h" #include "sdma_v5_0.h" #include "vcn_v2_0.h" +#include "jpeg_v2_0.h" #include "dce_virtual.h" #include "mes_v10_1.h" +#include "mxgpu_nv.h" static const struct amd_ip_funcs nv_common_ip_funcs; @@ -62,8 +67,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -77,8 +82,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -118,7 +123,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 nv_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_memsize(adev); + return adev->nbio.funcs->get_memsize(adev); } static u32 nv_get_xclk(struct amdgpu_device *adev) @@ -153,8 +158,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) static bool nv_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) { - /* TODO: will implement it when SMU header is available */ - return false; + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; } static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { @@ -175,6 +199,7 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, @@ -278,7 +303,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio_funcs->get_memsize(adev); + u32 memsize = adev->nbio.funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -289,6 +314,28 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) return ret; } + +static bool nv_asic_supports_baco(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (smu_baco_is_support(smu)) + return true; + else + return false; +} + +static enum amd_reset_method +nv_asic_reset_method(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; +} + static int nv_asic_reset(struct amdgpu_device *adev) { @@ -303,10 +350,20 @@ static int nv_asic_reset(struct amdgpu_device *adev) int ret = 0; struct smu_context *smu = &adev->smu; - if (smu_baco_is_support(smu)) - ret = smu_baco_reset(smu); - else + if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + ret = smu_baco_enter(smu); + if (ret) + return ret; + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); + } return ret; } @@ -350,8 +407,8 @@ static void nv_program_aspm(struct amdgpu_device *adev) static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio_funcs->enable_doorbell_aperture(adev, enable); - adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version nv_common_ip_block = @@ -363,29 +420,65 @@ static const struct amdgpu_ip_block_version nv_common_ip_block = .funcs = &nv_common_ip_funcs, }; -int nv_set_ip_blocks(struct amdgpu_device *adev) +static int nv_reg_base_init(struct amdgpu_device *adev) { - /* Set IP register base before any HW register access */ + int r; + + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + goto legacy_init; + } + + return 0; + } + +legacy_init: switch (adev->asic_type) { case CHIP_NAVI10: navi10_reg_base_init(adev); break; + case CHIP_NAVI14: + navi14_reg_base_init(adev); + break; + case CHIP_NAVI12: + navi12_reg_base_init(adev); + break; default: return -EINVAL; } - adev->nbio_funcs = &nbio_v2_3_funcs; + return 0; +} - adev->nbio_funcs->detect_hw_virt(adev); +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ + int r; + + /* Set IP register base before any HW register access */ + r = nv_reg_base_init(adev); + if (r) + return r; + + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + + adev->nbio.funcs->detect_hw_virt(adev); + + if (amdgpu_sriov_vf(adev)) + adev->virt.ops = &xgpu_nv_virt_ops; switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -396,12 +489,35 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; + case CHIP_NAVI12: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); + break; default: return -EINVAL; } @@ -411,12 +527,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) static uint32_t nv_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_rev_id(adev); + return adev->nbio.funcs->get_rev_id(adev); } static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio_funcs->hdp_flush(adev, ring); + adev->nbio.funcs->hdp_flush(adev, ring); } static void nv_invalidate_hdp(struct amdgpu_device *adev, @@ -461,6 +577,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + static void nv_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -496,6 +622,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .read_bios_from_rom = &nv_read_bios_from_rom, .read_register = &nv_read_register, .reset = &nv_asic_reset, + .reset_method = &nv_asic_reset_method, .set_vga_state = &nv_vga_set_state, .get_xclk = &nv_get_xclk, .set_uvd_clocks = &nv_set_uvd_clocks, @@ -507,13 +634,17 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .need_full_reset = &nv_need_full_reset, .get_pcie_usage = &nv_get_pcie_usage, .need_reset_on_init = &nv_need_reset_on_init, + .get_pcie_replay_count = &nv_get_pcie_replay_count, + .supports_baco = &nv_asic_supports_baco, }; static int nv_common_early_init(void *handle) { - bool psp_enabled = false; +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; @@ -528,10 +659,6 @@ static int nv_common_early_init(void *handle) adev->asic_funcs = &nv_asic_funcs; - if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) - psp_enabled = true; - adev->rev_id = nv_get_rev_id(adev); adev->external_rev_id = 0xff; switch (adev->asic_type) { @@ -548,29 +675,95 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | - AMD_PG_SUPPORT_MMHUB | + AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; + case CHIP_NAVI14: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 20; + break; + case CHIP_NAVI12: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; + adev->external_rev_id = adev->rev_id + 0xa; + break; default: /* FIXME: not supported yet */ return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } + return 0; } static int nv_common_late_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + return 0; } static int nv_common_sw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + return 0; } @@ -588,7 +781,13 @@ static int nv_common_hw_init(void *handle) /* enable aspm */ nv_program_aspm(adev); /* setup nbio registers */ - adev->nbio_funcs->init_registers(adev); + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ nv_enable_doorbell_aperture(adev, true); @@ -748,14 +947,16 @@ static int nv_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); nv_update_hdp_mem_power_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); nv_update_hdp_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -778,7 +979,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio_funcs->get_clockgating_state(adev, flags); + adev->nbio.funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_MGCG */ tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 639c54933cc5..82e6cb432f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -30,4 +30,6 @@ void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); +int navi14_reg_base_init(struct amdgpu_device *adev); +int navi12_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 5080a73a95a5..36b65797434e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -233,8 +233,16 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ - GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */ - GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */ + GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_DMUB = 51, /* DMUB RN */ + GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */ + GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */ + GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */ + GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ + GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ + GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ + GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index ce1ea31feee0..7539104175e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -40,6 +40,9 @@ MODULE_FIRMWARE("amdgpu/raven_asd.bin"); MODULE_FIRMWARE("amdgpu/picasso_asd.bin"); MODULE_FIRMWARE("amdgpu/raven2_asd.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ta.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ta.bin"); +MODULE_FIRMWARE("amdgpu/raven_ta.bin"); static int psp_v10_0_init_microcode(struct psp_context *psp) { @@ -48,7 +51,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *hdr; - + const struct ta_firmware_header_v1_0 *ta_hdr; DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -79,7 +82,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)hdr + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v10.0: Failed to load firmware \"%s\"\n", + fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *) + adev->psp.ta_fw->data; + adev->psp.ta_hdcp_ucode_version = + le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); + adev->psp.ta_hdcp_ucode_size = + le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); + adev->psp.ta_hdcp_start_addr = + (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_dtm_ucode_version = + le32_to_cpu(ta_hdr->ta_dtm_ucode_version); + adev->psp.ta_dtm_ucode_size = + le32_to_cpu(ta_hdr->ta_dtm_size_bytes); + adev->psp.ta_dtm_start_addr = + (uint8_t *)adev->psp.ta_hdcp_start_addr + + le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + } + return 0; + +out2: + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; out: if (err) { dev_err(adev->dev, @@ -189,54 +230,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v10_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v10_0_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -366,15 +359,30 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp) return -EINVAL; } +static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); +} + +static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v10_0_funcs = { .init_microcode = psp_v10_0_init_microcode, .ring_init = psp_v10_0_ring_init, .ring_create = psp_v10_0_ring_create, .ring_stop = psp_v10_0_ring_stop, .ring_destroy = psp_v10_0_ring_destroy, - .cmd_submit = psp_v10_0_cmd_submit, .compare_sram_data = psp_v10_0_compare_sram_data, .mode1_reset = psp_v10_0_mode1_reset, + .ring_get_wptr = psp_v10_0_ring_get_wptr, + .ring_set_wptr = psp_v10_0_ring_set_wptr, }; void psp_v10_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 41b72588adcf..0829188c1a5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -22,6 +22,7 @@ #include <linux/firmware.h> #include <linux/module.h> +#include <linux/vmalloc.h> #include "amdgpu.h" #include "amdgpu_psp.h" @@ -43,6 +44,16 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi10_ta.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -51,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); #define mmRLC_GPM_UCODE_DATA_NV10 0x5b62 #define mmSDMA0_UCODE_ADDR_NV10 0x5880 #define mmSDMA0_UCODE_DATA_NV10 0x5881 +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 static int psp_v11_0_init_microcode(struct psp_context *psp) { @@ -60,6 +73,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) int err = 0; const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; + const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_0 *asd_hdr; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -72,6 +86,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; default: BUG(); } @@ -107,6 +130,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); } + if (sos_hdr->header.header_version_minor == 2) { + sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); + } break; default: dev_err(adev->dev, @@ -133,6 +162,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) switch (adev->asic_type) { case CHIP_VEGA20: + case CHIP_ARCTURUS: snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { @@ -158,6 +188,33 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) } break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); + adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); + adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version); + adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes); + adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr + + le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + } break; default: BUG(); @@ -180,26 +237,55 @@ out: return err; } +int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + +static bool psp_v11_0_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) { int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; /* Check tOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) { + if (psp_v11_0_is_sos_alive(psp)) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; } - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -208,16 +294,14 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) /* Copy PSP KDB binary to memory */ memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); - /* Provide the sys driver to bootloader */ + /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); return ret; } @@ -227,21 +311,17 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) { + if (psp_v11_0_is_sos_alive(psp)) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; } - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -260,8 +340,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); return ret; } @@ -271,18 +350,14 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) int ret; unsigned int psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (psp_v11_0_is_sos_alive(psp)) return 0; - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -373,6 +448,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp) return false; } +static int psp_v11_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v11_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v11_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + static int psp_v11_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -382,6 +485,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if (psp_v11_0_support_vmr_ring(psp)) { + ret = psp_v11_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); + return ret; + } + /* Write low address of the ring to C2PMSG_102 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); @@ -401,6 +510,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp, 0x80000000, 0x8000FFFF, false); } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); + return ret; + } + /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); @@ -426,33 +543,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp, return ret; } -static int psp_v11_0_ring_stop(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct amdgpu_device *adev = psp->adev; - - /* Write the ring destroy command*/ - if (psp_v11_0_support_vmr_ring(psp)) - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); - else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, - GFX_CTRL_CMD_ID_DESTROY_RINGS); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) */ - if (psp_v11_0_support_vmr_ring(psp)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); - else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); - - return ret; -} static int psp_v11_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) @@ -472,63 +562,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v11_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - if (psp_v11_0_support_vmr_ring(psp)) - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); - else - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - /* write_frame ptr increments by size of rb_frame in bytes */ - /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - if (psp_v11_0_support_vmr_ring(psp)) { - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); - } else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v11_0_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -865,6 +898,216 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) return psp_rlc_autoload_start(psp); } +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + DRM_DEBUG("training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + +static void psp_v11_0_memory_training_fini(struct psp_context *psp) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + kfree(ctx->sys_cache); + ctx->sys_cache = NULL; +} + +static int psp_v11_0_memory_training_init(struct psp_context *psp) +{ + int ret; + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { + DRM_DEBUG("memory training is not supported!\n"); + return 0; + } + + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); + if (ctx->sys_cache == NULL) { + DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + ret = -ENOMEM; + goto Err_out; + } + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; + return 0; + +Err_out: + psp_v11_0_memory_training_fini(psp); + return ret; +} + +/* + * save and restore proces + */ +static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t*)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + DRM_DEBUG("Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + DRM_ERROR("Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v11_0_is_sos_alive(psp)) { + DRM_DEBUG("SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + DRM_DEBUG("Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + DRM_DEBUG("Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long traing will encroach certain mount of bottom VRAM, + * saving the content of this bottom VRAM to system memory + * before training, and restoring it after training to avoid + * VRAM corruption. + */ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + DRM_ERROR("failed to allocate system memory.\n"); + return -ENOMEM; + } + + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->nbio.funcs->hdp_flush(adev, NULL); + vfree(buf); + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + DRM_ERROR("send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + +static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, @@ -874,7 +1117,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_create = psp_v11_0_ring_create, .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, - .cmd_submit = psp_v11_0_cmd_submit, .compare_sram_data = psp_v11_0_compare_sram_data, .mode1_reset = psp_v11_0_mode1_reset, .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, @@ -885,6 +1127,11 @@ static const struct psp_funcs psp_v11_0_funcs = { .ras_trigger_error = psp_v11_0_ras_trigger_error, .ras_cure_posion = psp_v11_0_ras_cure_posion, .rlc_autoload_start = psp_v11_0_rlc_autoload_start, + .mem_training_init = psp_v11_0_memory_training_init, + .mem_training_fini = psp_v11_0_memory_training_fini, + .mem_training = psp_v11_0_memory_training, + .ring_get_wptr = psp_v11_0_ring_get_wptr, + .ring_set_wptr = psp_v11_0_ring_set_wptr, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c new file mode 100644 index 000000000000..58d8b6d732e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -0,0 +1,534 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v12_0.h" + +#include "mp/mp_12_0_0_offset.h" +#include "mp/mp_12_0_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_7_4_offset.h" + +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); +/* address block */ +#define smnMP1_FIRMWARE_FLAGS 0x3010024 + +static int psp_v12_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *asd_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out1; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out1; + + asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); + + return 0; + +out1: + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + + return err; +} + +static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP System Driver binary to memory */ + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static void psp_v12_0_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + +static int psp_v12_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + psp_v12_0_reroute_ih(psp); + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static bool psp_v12_0_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + return false; +} + +static int psp_v12_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v12_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v12_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v12_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v12_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static int +psp_v12_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch (ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static bool psp_v12_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +static int psp_v12_0_mode1_reset(struct psp_context *psp) +{ + int ret; + uint32_t offset; + struct amdgpu_device *adev = psp->adev; + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + + if (ret) { + DRM_INFO("psp is not working correctly before mode1 reset!\n"); + return -EINVAL; + } + + /*send the mode 1 reset command*/ + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); + + msleep(500); + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + + if (ret) { + DRM_INFO("psp mode 1 reset failed!\n"); + return -EINVAL; + } + + DRM_INFO("psp mode1 reset succeed \n"); + + return 0; +} + +static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v12_0_funcs = { + .init_microcode = psp_v12_0_init_microcode, + .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, + .bootloader_load_sos = psp_v12_0_bootloader_load_sos, + .ring_init = psp_v12_0_ring_init, + .ring_create = psp_v12_0_ring_create, + .ring_stop = psp_v12_0_ring_stop, + .ring_destroy = psp_v12_0_ring_destroy, + .compare_sram_data = psp_v12_0_compare_sram_data, + .mode1_reset = psp_v12_0_mode1_reset, + .ring_get_wptr = psp_v12_0_ring_get_wptr, + .ring_set_wptr = psp_v12_0_ring_set_wptr, +}; + +void psp_v12_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v12_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h new file mode 100644 index 000000000000..241693ab1990 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V12_0_H__ +#define __PSP_V12_0_H__ + +#include "amdgpu_psp.h" + +void psp_v12_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 019c47feee42..735c43c7daab 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -179,7 +179,7 @@ static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver) * Double check if the latest four legacy versions. * If yes, it is still the right version. */ - for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) { + for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) { if (sos_old_versions[i] == adev->psp.sos_fw_version) return true; } @@ -410,65 +410,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v3_1_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - if (psp_v3_1_support_vmr_ring(psp)) - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); - else - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - /* write_frame ptr increments by size of rb_frame in bytes */ - /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - if (psp_v3_1_support_vmr_ring(psp)) { - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); - /* send interrupt to PSP for SRIOV ring write pointer update */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_CONSUME_CMD); - } else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v3_1_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -636,12 +577,37 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) { - if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + if (amdgpu_sriov_vf(psp->adev)) return true; return false; } +static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v3_1_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + return data; +} + +static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -650,11 +616,12 @@ static const struct psp_funcs psp_v3_1_funcs = { .ring_create = psp_v3_1_ring_create, .ring_stop = psp_v3_1_ring_stop, .ring_destroy = psp_v3_1_ring_destroy, - .cmd_submit = psp_v3_1_cmd_submit, .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, .support_vmr_ring = psp_v3_1_support_vmr_ring, + .ring_get_wptr = psp_v3_1_ring_get_wptr, + .ring_set_wptr = psp_v3_1_ring_set_wptr, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a10175838013..7d509a40076f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1260,16 +1260,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v2_4_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 5f4e2c616241..b6109a99fc43 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1698,16 +1698,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v3_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4428018672d3..e55884d204bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -34,6 +34,18 @@ #include "sdma0/sdma0_4_2_sh_mask.h" #include "sdma1/sdma1_4_2_offset.h" #include "sdma1/sdma1_4_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -55,6 +67,8 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); +MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -68,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), @@ -202,25 +217,232 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) }; +static const struct soc15_reg_golden golden_settings_sdma_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) +}; + +static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) +}; + +static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { + { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED), + 0, 0, + }, + { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED), + 0, 0, + }, + { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED), + 0, 0, + }, + { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED), + 0, 0, + }, + { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED), + 0, 0, + }, + { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED), + 0, 0, + }, +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { - return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : - (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3; + case 4: + return SOC15_IH_CLIENTID_SDMA4; + case 5: + return SOC15_IH_CLIENTID_SDMA5; + case 6: + return SOC15_IH_CLIENTID_SDMA6; + case 7: + return SOC15_IH_CLIENTID_SDMA7; + default: + break; + } + return -EINVAL; +} + +static int sdma_v4_0_irq_id_to_seq(unsigned client_id) +{ + switch (client_id) { + case SOC15_IH_CLIENTID_SDMA0: + return 0; + case SOC15_IH_CLIENTID_SDMA1: + return 1; + case SOC15_IH_CLIENTID_SDMA2: + return 2; + case SOC15_IH_CLIENTID_SDMA3: + return 3; + case SOC15_IH_CLIENTID_SDMA4: + return 4; + case SOC15_IH_CLIENTID_SDMA5: + return 5; + case SOC15_IH_CLIENTID_SDMA6: + return 6; + case SOC15_IH_CLIENTID_SDMA7: + return 7; + default: + break; + } + return -EINVAL; } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -241,6 +463,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma1_4_2, ARRAY_SIZE(golden_settings_sdma1_4_2)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_sdma_arct, + ARRAY_SIZE(golden_settings_sdma_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -254,11 +481,53 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_rv1, ARRAY_SIZE(golden_settings_sdma_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_sdma_4_3, + ARRAY_SIZE(golden_settings_sdma_4_3)); + break; default: break; } } +static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + const struct sdma_firmware_header_v1_0 *hdr; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + + /* arcturus shares the same FW memory across + all SDMA isntances */ + if (adev->asic_type == CHIP_ARCTURUS) + break; + } + + memset((void*)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -278,7 +547,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -300,30 +568,52 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; default: BUG(); } - for (i = 0; i < adev->sdma.num_instances; i++) { - if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + + err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + + for (i = 1; i < adev->sdma.num_instances; i++) { + if (adev->asic_type == CHIP_ARCTURUS) { + /* Acturus will leverage the same FW memory + for every SDMA instance */ + memcpy((void*)&adev->sdma.instance[i], + (void*)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); + if (err) + goto out; + } + } + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + for (i = 0; i < adev->sdma.num_instances; i++) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -332,13 +622,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); } } + out: if (err) { DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); } return err; } @@ -510,7 +798,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -559,16 +847,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->me == 0) - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; - else - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, - adev->nbio_funcs->get_hdp_flush_done_offset(adev), - adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), ref_and_mask, ref_and_mask, 10); } @@ -620,26 +905,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i; + int i, unset = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) + if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = 1; + } - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -663,16 +949,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + bool unset = false; for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].page; + + if ((adev->mman.buffer_funcs_ring == sdma[i]) && + (unset == false)) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = true; + } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -681,10 +971,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -1018,6 +1307,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: sdma_v4_1_init_power_gating(adev); sdma_v4_1_update_power_gating(adev, true); break; @@ -1389,7 +1679,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1473,8 +1763,10 @@ static int sdma_v4_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - if (adev->asic_type == CHIP_RAVEN) + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) adev->sdma.num_instances = 1; + else if (adev->asic_type == CHIP_ARCTURUS) + adev->sdma.num_instances = 8; else adev->sdma.num_instances = 2; @@ -1494,109 +1786,33 @@ static int sdma_v4_0_early_init(void *handle) sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); sdma_v4_0_set_irq_funcs(adev); + sdma_v4_0_set_ras_funcs(adev); return 0; } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, struct amdgpu_iv_entry *entry); static int sdma_v4_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->sdma.ras_if; struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; - struct ras_fs_if fs_info = { - .sysfs_name = "sdma_err_count", - .debugfs_name = "sdma_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__SDMA, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "sdma", - }; - int r; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); - return 0; - } - - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__SDMA); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = ras_block; + int i; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__SDMA); - r = 0; - } - goto feature; + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); } - ih_info.head = **ras_if; - fs_info.head = **ras_if; - - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - - amdgpu_ras_debugfs_create(adev, &fs_info); - - r = amdgpu_ras_sysfs_create(adev, &fs_info); - if (r) - goto sysfs; -resume: - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - if (r) - goto irq; - - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); - if (r) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - goto irq; - } - - return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; + if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) + return adev->sdma.funcs->ras_late_init(adev, &ih_info); + else + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -1606,28 +1822,22 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + } /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + } for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; @@ -1641,11 +1851,8 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 1024, - &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; @@ -1663,9 +1870,7 @@ static int sdma_v4_0_sw_init(void *handle) sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; } @@ -1679,21 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) { - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - /*remove fs first*/ - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - /*remove the IH*/ - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } + if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -1701,10 +1893,7 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); return 0; } @@ -1714,11 +1903,13 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) || + (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset)) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); - sdma_v4_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); @@ -1728,18 +1919,22 @@ static int sdma_v4_0_hw_init(void *handle) static int sdma_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs - && adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; @@ -1776,15 +1971,17 @@ static bool sdma_v4_0_is_idle(void *handle) static int sdma_v4_0_wait_for_idle(void *handle) { - unsigned i; - u32 sdma0, sdma1; + unsigned i, j; + u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); - sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); - - if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + for (j = 0; j < adev->sdma.num_instances; j++) { + sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); + if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) + break; + } + if (j == adev->sdma.num_instances) return 0; udelay(1); } @@ -1820,17 +2017,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, uint32_t instance; DRM_DEBUG("IH: SDMA trap\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: - return 0; - } - + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -1851,55 +2038,26 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, struct amdgpu_iv_entry *entry) { - uint32_t instance, err_source; - - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: - return 0; - } - - switch (entry->src_id) { - case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: - err_source = 0; - break; - case SDMA0_4_0__SRCID__SDMA_ECC: - err_source = 1; - break; - default: - return 0; - } - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - - amdgpu_ras_reset_gpu(adev, 0); - - return AMDGPU_RAS_UE; -} + int instance; -static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + goto out; - if (!ras_if) - return 0; + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + goto out; - ih_data.head = *ras_if; + amdgpu_sdma_process_ras_data_cb(adev, err_data, entry); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; +out: + return AMDGPU_RAS_SUCCESS; } static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, @@ -1910,16 +2068,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->ring_id) { case 0: @@ -1936,14 +2087,10 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev, { u32 sdma_edc_config; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); - - sdma_edc_config = RREG32(reg_offset); + sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG); sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_edc_config); + WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config); return 0; } @@ -1953,61 +2100,35 @@ static void sdma_v4_0_update_medium_grain_clock_gating( bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - /* enable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } else { - /* disable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } } @@ -2018,34 +2139,23 @@ static void sdma_v4_0_update_medium_grain_light_sleep( bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { - /* 1-not override: enable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 1-not override: enable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 1-not override: enable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } else { - /* 0-override:disable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 0-override:disable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 0-override:disable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } } @@ -2063,10 +2173,12 @@ static int sdma_v4_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: sdma_v4_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v4_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -2133,7 +2245,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_ring_get_wptr, + .set_wptr = sdma_v4_0_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/* + * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). + * So create a individual constant ring_funcs for those instances. + */ +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2165,7 +2313,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2197,10 +2377,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } } @@ -2217,17 +2407,30 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { .set = sdma_v4_0_set_ecc_irq_state, - .process = sdma_v4_0_process_ecc_irq, + .process = amdgpu_sdma_process_ecc_irq, }; static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + switch (adev->sdma.num_instances) { + case 1: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + break; + case 8: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + break; + case 2: + default: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + break; + } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; } @@ -2293,8 +2496,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) - adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page; + if (adev->sdma.has_page_queue) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; else adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } @@ -2313,21 +2516,77 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) { - for (i = 1; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) sched = &adev->sdma.instance[i].page.sched; - adev->vm_manager.vm_pte_rqs[i - 1] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1; - adev->vm_manager.page_fault = &adev->sdma.instance[0].page; - } else { - for (i = 0; i < adev->sdma.num_instances; i++) { + else sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; +} + +static void sdma_v4_0_get_ras_error_count(uint32_t value, + uint32_t instance, + uint32_t *sec_count) +{ + uint32_t i; + uint32_t sec_cnt; + + /* double bits error (multiple bits) error detection is not supported */ + for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) { + /* the SDMA_EDC_COUNTER register in each sdma instance + * shares the same sed shift_mask + * */ + sec_cnt = (value & + sdma_v4_0_ras_fields[i].sec_count_mask) >> + sdma_v4_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("Detected %s in SDMA%d, SED %d\n", + sdma_v4_0_ras_fields[i].name, + instance, sec_cnt); + *sec_count += sec_cnt; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + } +} + +static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0; + uint32_t reg_value = 0; + + reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_0_get_ras_error_count(reg_value, + instance, &sec_count); + /* err_data->ce_count should be initialized to 0 + * before calling into this function */ + err_data->ce_count += sec_count; + /* double bit error is not supported + * set ue count to 0 */ + err_data->ue_count = 0; + + return 0; +}; + +static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .ras_late_init = amdgpu_sdma_ras_late_init, + .ras_fini = amdgpu_sdma_ras_fini, + .query_ras_error_count = sdma_v4_0_query_ras_error_count, +}; + +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + adev->sdma.funcs = &sdma_v4_0_ras_funcs; + break; + default: + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3747c3f1f0cc..67b9830b7c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -21,8 +21,11 @@ * */ +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -42,6 +45,12 @@ MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -59,7 +68,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -71,7 +80,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -80,6 +89,18 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { }; static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) @@ -111,6 +132,22 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_nv10, (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv14, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv12, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); + break; default: break; } @@ -143,6 +180,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } @@ -339,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); - /* IB packet must end on a 8 DW boundary */ - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + /* An IB packet must end on a 8 DW boundary--the next dword + * must be on a 8-dword boundary. Our IB packet below is 6 + * dwords long, thus add x number of NOPs, such that, in + * modular arithmetic, + * wptr + 6 + x = 8k, k >= 0, which in C is, + * (wptr + 6 + x) % 8 = 0. + * The expression below, is a solution of x. + */ + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -363,7 +413,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->me == 0) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -373,8 +423,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -640,7 +690,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, 20); if (amdgpu_sriov_vf(adev)) @@ -861,19 +911,12 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); + udelay(1); } - if (i < adev->usec_timeout) { - if (amdgpu_emu_mode == 1) - DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i); - else - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + amdgpu_device_wb_free(adev, index); return r; @@ -938,13 +981,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -1043,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, } /** - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw - * + * sdma_v5_0_ring_pad_ib - pad the IB * @ib: indirect buffer to fill with padding * + * Pad the IB with NOPs to a boundary multiple of 8. */ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { @@ -1054,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 0x7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1086,7 +1126,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xfffffff); /* mask */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ } @@ -1130,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1316,7 +1366,7 @@ static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -1472,10 +1522,12 @@ static int sdma_v5_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: sdma_v5_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v5_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -1532,7 +1584,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, .set_wptr = sdma_v5_0_ring_set_wptr, @@ -1543,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, @@ -1557,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .pad_ib = sdma_v5_0_ring_pad_ib, .emit_wreg = sdma_v5_0_ring_emit_wreg, .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, @@ -1583,7 +1636,8 @@ static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = { static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; } @@ -1664,17 +1718,15 @@ static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4d74453f3cfb..4d415bfdb42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -975,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {GRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, {GB_ADDR_CONFIG}, {MC_ARB_RAMCFG}, {GB_TILE_MODE0}, @@ -1186,6 +1197,17 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static bool si_asic_supports_baco(struct amdgpu_device *adev) +{ + return false; +} + +static enum amd_reset_method +si_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 si_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1394,6 +1416,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, + .reset_method = &si_asic_reset_method, .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, @@ -1407,6 +1430,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, .get_pcie_replay_count = &si_get_pcie_replay_count, + .supports_baco = &si_asic_supports_baco, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) @@ -1626,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev) static void si_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; - int bridge_pos, gpu_pos; u32 speed_cntl, current_data_rate; int i; u16 tmp16; @@ -1661,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } - bridge_pos = pci_pcie_cap(root); - if (!bridge_pos) - return; - - gpu_pos = pci_pcie_cap(adev->pdev); - if (!gpu_pos) + if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) return; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { @@ -1675,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, + &gpu_cfg); tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, + tmp16); tmp = RREG32_PCIE(PCIE_LC_STATUS1); max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; @@ -1699,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) } for (i = 0; i < 10; i++) { - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_DEVSTA, + &tmp16); if (tmp16 & PCI_EXP_DEVSTA_TRPND) break; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &gpu_cfg); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &bridge_cfg2); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &gpu_cfg2); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp |= LC_SET_QUIESCE; @@ -1719,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) mdelay(100); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, + tmp16); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); - - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); - - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL, + tmp16); + + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(root, + PCI_EXP_LNKCTL2, + tmp16); + + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL2, + tmp16); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp &= ~LC_SET_QUIESCE; @@ -1750,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~0xf; + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL2_TLS; + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - tmp16 |= 3; + tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - tmp16 |= 2; + tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else - tmp16 |= 1; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; @@ -1881,7 +1930,7 @@ static void si_program_aspm(struct amdgpu_device *adev) if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); - if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) { + if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); data &= ~PLL_RAMP_UP_TIME_0_MASK; if (orig != data) @@ -1930,14 +1979,14 @@ static void si_program_aspm(struct amdgpu_device *adev) orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index bdda8b4e03f0..42d5601b6bf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -648,7 +648,7 @@ static int si_dma_set_clockgating_state(void *handle, bool enable; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - enable = (state == AMD_CG_STATE_GATE) ? true : false; + enable = (state == AMD_CG_STATE_GATE); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -834,16 +834,14 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version si_dma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 57bb5f9e08b2..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c new file mode 100644 index 000000000000..c902f26cf50d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -0,0 +1,732 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +#include "smu_v11_0_i2c.h" +#include "amdgpu.h" +#include "soc15_common.h" +#include <drm/drm_fixed.h> +#include <drm/drm_drv.h> +#include "amdgpu_amdkfd.h" +#include <linux/i2c.h> +#include <linux/pci.h> +#include "amdgpu_ras.h" + +/* error codes */ +#define I2C_OK 0 +#define I2C_NAK_7B_ADDR_NOACK 1 +#define I2C_NAK_TXDATA_NOACK 2 +#define I2C_TIMEOUT 4 +#define I2C_SW_TIMEOUT 8 +#define I2C_ABORT 0x10 + +/* I2C transaction flags */ +#define I2C_NO_STOP 1 +#define I2C_RESTART 2 + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev +#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor) + +static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); + + reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); + WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); +} + + +static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); +} + +static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + /* do */ + { + RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); + + } /* while (reg_CKSVII2C_ic_clr_intr == 0) */ +} + +static void smu_v11_0_i2c_configure(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); + /* Standard mode */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); +} + +static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* + * Standard mode speed, These values are taken from SMUIO MAS, + * but are different from what is given is + * Synopsys spec. The values here are based on assumption + * that refclock is 100MHz + * + * Configuration for standard mode; Speed = 100kbps + * Scale linearly, for now only support standard speed clock + * This will work only with 100M ref clock + * + * TBD:Change the calculation to take into account ref clock values also. + */ + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); +} + +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Convert fromr 8-bit to 7-bit address */ + address >>= 1; + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); +} + +static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg, reg_c_tx_abrt_source; + + /*Check if transmission is completed */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0); + + if (ret != I2C_OK) + return ret; + + /* This only checks if NAK is received and transaction got aborted */ + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT); + + if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); + + /* Check for stop due to NACK */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_TXDATA_NOACK) == 1) { + + ret |= I2C_NAK_TXDATA_NOACK; + + } else if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + + ret |= I2C_NAK_7B_ADDR_NOACK; + } else { + ret |= I2C_ABORT; + } + + smu_v11_0_i2c_clear_status(control); + } + + return ret; +} + +static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg_ic_status, reg_c_tx_abrt_source; + + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + + /* If slave is not present */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + ret |= I2C_NAK_7B_ADDR_NOACK; + + smu_v11_0_i2c_clear_status(control); + } else { /* wait till some data is there in RXFIFO */ + /* Poll for some byte in RXFIFO */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0); + } + + return ret; +} + + + + +/** + * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. + * + * @address: The I2C address of the slave device. + * @data: The data to transmit over the bus. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint32_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_sent, reg, ret = 0; + unsigned long timeout_counter; + + bytes_sent = 0; + + DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", + (uint16_t)address, numbytes); + + if (drm_debug_enabled(DRM_UT_DRIVER)) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, numbytes, false); + } + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + /* Clear status bits */ + smu_v11_0_i2c_clear_status(control); + + + timeout_counter = jiffies + msecs_to_jiffies(20); + + while (numbytes > 0) { + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { + do { + reg = 0; + /* + * Prepare transaction, no need to set RESTART. I2C engine will send + * START as soon as it sees data in TXFIFO + */ + if (bytes_sent == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); + + /* determine if we need to send STOP bit or not */ + if (numbytes == 1) + /* Final transaction, so send stop unless I2C_NO_STOP */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + /* Write */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + /* Record that the bytes were transmitted */ + bytes_sent++; + numbytes--; + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); + } + + /* + * We waited too long for the transmission FIFO to become not-full. + * Exit the loop with error. + */ + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + goto Err; + } + } + + ret = smu_v11_0_i2c_poll_tx_status(control); + +Err: + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + } + + return ret; +} + + +/** + * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. + * + * @address: The I2C address of the slave device. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint8_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_received, ret = I2C_OK; + + bytes_received = 0; + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + while (numbytes > 0) { + uint32_t reg = 0; + + smu_v11_0_i2c_clear_status(control); + + + /* Prepare transaction */ + + /* Each time we disable I2C, so this is not a restart */ + if (bytes_received == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); + /* Read */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); + + /* Transmitting last byte */ + if (numbytes == 1) + /* Final transaction, so send stop if requested */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + ret = smu_v11_0_i2c_poll_rx_status(control); + + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD); + data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT); + + /* Record that the bytes were received */ + bytes_received++; + numbytes--; + } + + DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", + (uint16_t)address, bytes_received); + + if (drm_debug_enabled(DRM_UT_DRIVER)) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, bytes_received, false); + } + + return ret; +} + +static void smu_v11_0_i2c_abort(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + /* Enable I2C engine; */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + /* Abort previous transaction */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + DRM_DEBUG_DRIVER("I2C_Abort() Done."); +} + + +static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + const uint32_t IDLE_TIMEOUT = 1024; + uint32_t timeout_count = 0; + uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity; + + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) { + /* Nobody is using I2C engine */ + return true; + } + + /* Keep reading activity bit until it's cleared */ + do { + reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY); + + if (REG_GET_FIELD(reg_ic_clr_activity, + CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0) + return true; + + ++timeout_count; + + } while (timeout_count < IDLE_TIMEOUT); + + return false; +} + +static void smu_v11_0_i2c_init(struct i2c_adapter *control) +{ + /* Disable clock gating */ + smu_v11_0_i2c_set_clock_gating(control, false); + + if (!smu_v11_0_i2c_activity_done(control)) + DRM_WARN("I2C busy !"); + + /* Disable I2C */ + smu_v11_0_i2c_enable(control, false); + + /* Configure I2C to operate as master and in standard mode */ + smu_v11_0_i2c_configure(control); + + /* Initialize the clock to 50 kHz default */ + smu_v11_0_i2c_set_clock(control); + +} + +static void smu_v11_0_i2c_fini(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg_ic_enable_status, reg_ic_enable; + + smu_v11_0_i2c_enable(control, false); + + /* Double check if disabled, else force abort */ + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, + CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } + + /* Restore clock gating */ + + /* + * TODO Reenabling clock gating seems to break subsequent SMU operation + * on the I2C bus. My guess is that SMU doesn't disable clock gating like + * we do here before working with the bus. So for now just don't restore + * it but later work with SMU to see if they have this issue and can + * update their code appropriately + */ + /* smu_v11_0_i2c_set_clock_gating(control, true); */ + +} + +static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true)) + return true; + +Fail: + return false; +} + +static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + /* Send PPSMC_MSG_ReleaseI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, + false)) + return true; + +Fail: + return false; +} + +/***************************** EEPROM I2C GLUE ****************************/ + +static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret = 0; + + /* First 2 bytes are dummy write to set EEPROM address */ + ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); + if (ret != I2C_OK) + goto Fail; + + /* Now read data starting with that address */ + ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, + I2C_RESTART); + +Fail: + if (ret != I2C_OK) + DRM_ERROR("ReadData() - I2C error occurred :%x", ret); + + return ret; +} + +static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret; + + ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + + if (ret != I2C_OK) + DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); + else + /* + * According to EEPROM spec there is a MAX of 10 ms required for + * EEPROM to flush internal RX buffer after STOP was issued at the + * end of write transaction. During this time the EEPROM will not be + * responsive to any more commands - so wait a bit more. + * + * TODO Improve to wait for first ACK for slave address after + * internal write cycle done. + */ + msleep(10); + + return ret; + +} + +static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_lock(i2c)) { + DRM_ERROR("Failed to lock the bus from SMU"); + return; + } + + control->bus_locked = true; +} + +static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + WARN_ONCE(1, "This operation not supposed to run in atomic context!"); + return false; +} + +static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_unlock(i2c)) { + DRM_ERROR("Failed to unlock the bus from SMU"); + return; + } + + control->bus_locked = false; +} + +static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { + .lock_bus = lock_bus, + .trylock_bus = trylock_bus, + .unlock_bus = unlock_bus, +}; + +static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msgs, int num) +{ + int i, ret; + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap); + + if (!control->bus_locked) { + DRM_ERROR("I2C bus unlocked, stopping transaction!"); + return -EIO; + } + + smu_v11_0_i2c_init(i2c_adap); + + for (i = 0; i < num; i++) { + if (msgs[i].flags & I2C_M_RD) + ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + else + ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + + if (ret != I2C_OK) { + num = -EIO; + break; + } + } + + smu_v11_0_i2c_fini(i2c_adap); + return num; +} + +static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + + +static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { + .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, + .functionality = smu_v11_0_i2c_eeprom_i2c_func, +}; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; + + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; + snprintf(control->name, sizeof(control->name), "RAS EEPROM"); + control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + + res = i2c_add_adapter(control); + if (res) + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + + return res; +} + +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) +{ + i2c_del_adapter(control); +} + +/* + * Keep this for future unit test if bugs arise + */ +#if 0 +#define I2C_TARGET_ADDR 0xA0 + +bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) +{ + + uint32_t ret = I2C_OK; + uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; + + + DRM_INFO("Begin"); + + if (!smu_v11_0_i2c_bus_lock(control)) { + DRM_ERROR("Failed to lock the bus!."); + return false; + } + + smu_v11_0_i2c_init(control); + + /* Write 0xde to address 0x0000 on the EEPROM */ + ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); + + ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); + + smu_v11_0_i2c_fini(control); + + smu_v11_0_i2c_bus_unlock(control); + + + DRM_INFO("End"); + return true; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h new file mode 100644 index 000000000000..973f28d68e70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h @@ -0,0 +1,34 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_V11_I2C_CONTROL_H +#define SMU_V11_I2C_CONTROL_H + +#include <linux/types.h> + +struct i2c_adapter; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 23265414d448..15f3424a1ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -58,11 +58,18 @@ #include "mmhub_v1_0.h" #include "df_v1_7.h" #include "df_v3_6.h" +#include "nbio_v6_1.h" +#include "nbio_v7_0.h" +#include "nbio_v7_4.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" #include "vce_v4_0.h" #include "vcn_v1_0.h" +#include "vcn_v2_0.h" +#include "jpeg_v2_0.h" +#include "vcn_v2_5.h" +#include "jpeg_v2_5.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -89,8 +96,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -104,8 +111,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -115,6 +122,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -217,7 +267,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_memsize(adev); + return adev->nbio.funcs->get_memsize(adev); } static u32 soc15_get_xclk(struct amdgpu_device *adev) @@ -291,6 +341,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, @@ -416,7 +467,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio_funcs->get_memsize(adev); + u32 memsize = adev->nbio.funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -428,76 +479,96 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) return ret; } -static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - static int soc15_asic_baco_reset(struct amdgpu_device *adev) { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; + /* avoid NBIF got stuck when do RAS recovery in BACO reset */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - dev_info(adev->dev, "GPU BACO reset\n"); + ret = amdgpu_dpm_baco_reset(adev); + if (ret) + return ret; - adev->in_baco_reset = 1; + /* re-enable doorbell interrupt after BACO exit */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; } -static int soc15_asic_reset(struct amdgpu_device *adev) +static enum amd_reset_method +soc15_asic_reset_method(struct amdgpu_device *adev) { - int ret; - bool baco_reset; + bool baco_reset = false; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (adev->asic_type) { + case CHIP_RAVEN: + case CHIP_RENOIR: + return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: - soc15_asic_get_baco_capability(adev, &baco_reset); + case CHIP_ARCTURUS: + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_reset); - else - baco_reset = false; - if (baco_reset) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + baco_reset = amdgpu_dpm_is_baco_supported(adev); - if (hive || (ras && ras->supported)) - baco_reset = false; - } + /* + * 1. PMFW version > 0x284300: all cases use baco + * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco + */ + if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + baco_reset = false; break; default: - baco_reset = false; break; } if (baco_reset) - ret = soc15_asic_baco_reset(adev); + return AMD_RESET_METHOD_BACO; else - ret = soc15_asic_mode1_reset(adev); + return AMD_RESET_METHOD_MODE1; +} - return ret; +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + /* original raven doesn't have full asic reset */ + if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8) + return 0; + + switch (soc15_asic_reset_method(adev)) { + case AMD_RESET_METHOD_BACO: + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + return soc15_asic_baco_reset(adev); + case AMD_RESET_METHOD_MODE2: + return amdgpu_dpm_mode2_reset(adev); + default: + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + return soc15_asic_mode1_reset(adev); + } +} + +static bool soc15_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_ARCTURUS: + return amdgpu_dpm_is_baco_supported(adev); + case CHIP_VEGA20: + if (adev->psp.sos_fw_version >= 0x80067) + return amdgpu_dpm_is_baco_supported(adev); + return false; + default: + return false; + } } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -556,8 +627,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio_funcs->enable_doorbell_aperture(adev, enable); - adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -571,7 +642,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_rev_id(adev); + return adev->nbio.funcs->get_rev_id(adev); } int soc15_set_ip_blocks(struct amdgpu_device *adev) @@ -581,32 +652,41 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_RENOIR: vega10_reg_base_init(adev); break; case CHIP_VEGA20: vega20_reg_base_init(adev); break; + case CHIP_ARCTURUS: + arct_reg_base_init(adev); + break; default: return -EINVAL; } - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; - if (adev->flags & AMD_IS_APU) - adev->nbio_funcs = &nbio_v7_0_funcs; - else if (adev->asic_type == CHIP_VEGA20) - adev->nbio_funcs = &nbio_v7_4_funcs; - else - adev->nbio_funcs = &nbio_v6_1_funcs; + if (adev->flags & AMD_IS_APU) { + adev->nbio.funcs = &nbio_v7_0_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg; + } else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { + adev->nbio.funcs = &nbio_v7_4_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; + } else { + adev->nbio.funcs = &nbio_v6_1_funcs; + adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; + } - if (adev->asic_type == CHIP_VEGA20) - adev->df_funcs = &df_v3_6_funcs; + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) + adev->df.funcs = &df_v3_6_funcs; else - adev->df_funcs = &df_v1_7_funcs; + adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); - adev->nbio_funcs->detect_hw_virt(adev); + adev->nbio.funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; @@ -638,11 +718,11 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) { - if (is_support_sw_smu(adev)) + if (is_support_sw_smu(adev)) { + if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -672,6 +752,53 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #endif amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; + case CHIP_ARCTURUS: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + } + + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + } + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); + break; + case CHIP_RENOIR: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); + break; default: return -EINVAL; } @@ -681,14 +808,14 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio_funcs->hdp_flush(adev, ring); + adev->nbio.funcs->hdp_flush(adev, ring); } static void soc15_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); else amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); @@ -714,14 +841,9 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* Set the 2 events that we wish to watch, defined above */ /* Reg 40 is # received msgs */ + /* Reg 104 is # of posted requests sent */ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); - /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ - if (adev->asic_type == CHIP_VEGA20) - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 108); - else - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 104); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); /* Write to enable desired perf counters */ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); @@ -751,6 +873,55 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs */ + /* Reg 108 is # of posted requests sent on VG20 */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT1_SEL, 108); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); +} + static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; @@ -792,6 +963,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -804,6 +976,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .supports_baco = &soc15_supports_baco, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -812,6 +985,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -821,9 +995,10 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, - .get_pcie_usage = &soc15_get_pcie_usage, + .get_pcie_usage = &vega20_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .supports_baco = &soc15_supports_baco, }; static int soc15_common_early_init(void *handle) @@ -837,6 +1012,8 @@ static int soc15_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; adev->pcie_wreg = &soc15_pcie_wreg; + adev->pcie_rreg64 = &soc15_pcie_rreg64; + adev->pcie_wreg64 = &soc15_pcie_wreg64; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; @@ -992,11 +1169,53 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; + break; + case CHIP_ARCTURUS: + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x32; + break; + case CHIP_RENOIR: + adev->asic_funcs = &soc15_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_DF_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0x91; break; default: /* FIXME: not supported yet */ @@ -1014,11 +1233,15 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - return 0; + if (adev->nbio.funcs->ras_late_init) + r = adev->nbio.funcs->ras_late_init(adev); + + return r; } static int soc15_common_sw_init(void *handle) @@ -1028,13 +1251,17 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df_funcs->sw_init(adev); + adev->df.funcs->sw_init(adev); return 0; } static int soc15_common_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_nbio_ras_fini(adev); + adev->df.funcs->sw_fini(adev); return 0; } @@ -1043,21 +1270,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - /* Two reasons to skip - * 1, Host driver already programmed them - * 2, To avoid registers program violations in SR-IOV - */ - if (!amdgpu_virt_support_skip_setting(adev)) { + /* sdma/ih doorbell range are programed by hypervisor */ + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - } - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + } } static int soc15_common_hw_init(void *handle) @@ -1069,13 +1293,13 @@ static int soc15_common_hw_init(void *handle) /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - adev->nbio_funcs->init_registers(adev); + adev->nbio.funcs->init_registers(adev); /* remap HDP registers to a hole in mmio space, * for the purpose of expose those registers * to process space */ - if (adev->nbio_funcs->remap_hdp_registers) - adev->nbio_funcs->remap_hdp_registers(adev); + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -1098,6 +1322,14 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); + if (adev->nbio.ras_if && + amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (adev->nbio.funcs->init_ras_controller_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } + return 0; } @@ -1134,7 +1366,8 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable { uint32_t def, data; - if (adev->asic_type == CHIP_VEGA20) { + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -1237,34 +1470,39 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - adev->df_funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); + adev->df.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); break; case CHIP_RAVEN: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + case CHIP_RENOIR: + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); + break; + case CHIP_ARCTURUS: + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE); break; default: break; @@ -1280,7 +1518,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio_funcs->get_clockgating_state(adev, flags); + adev->nbio.funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); @@ -1302,7 +1540,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - adev->df_funcs->get_clockgating_state(adev, flags); + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 7a6b2cc6d9f5..d0fb7a67c1a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; @@ -60,6 +60,18 @@ struct soc15_allowed_register_entry { bool grbm_indexed; }; +struct soc15_ras_field_entry { + const char *name; + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t sec_count_mask; + uint32_t sec_count_shift; + uint32_t ded_count_mask; + uint32_t ded_count_shift; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) @@ -67,6 +79,8 @@ struct soc15_allowed_register_entry { #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } +#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT + void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int soc15_set_ip_blocks(struct amdgpu_device *adev); @@ -77,6 +91,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +int arct_reg_base_init(struct amdgpu_device *adev); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 47f74dab365d..19e870c79896 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -52,6 +52,7 @@ uint32_t old_ = 0; \ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ uint32_t loop = adev->usec_timeout; \ + ret = 0; \ while ((tmp_ & (mask)) != (expected_value)) { \ if (old_ != tmp_) { \ loop = adev->usec_timeout; \ @@ -69,9 +70,10 @@ } \ } while (0) +#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a))) #define WREG32_RLC(reg, value) \ do { \ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t i = 0; \ uint32_t retries = 50000; \ uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ @@ -96,7 +98,7 @@ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c new file mode 100644 index 000000000000..0d6b50528d76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_0.h" +#include "amdgpu.h" + +static void umc_v6_0_init_registers(struct amdgpu_device *adev) +{ + unsigned i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002); +} + +const struct amdgpu_umc_funcs umc_v6_0_funcs = { + .init_registers = umc_v6_0_init_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h new file mode 100644 index 000000000000..109f1a57a46e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_0_H__ +#define __UMC_V6_0_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +extern const struct amdgpu_umc_funcs umc_v6_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c new file mode 100644 index 000000000000..793bf70e64b1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -0,0 +1,373 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" +#include "umc/umc_6_1_2_offset.h" + +#define UMC_6_INST_DIST 0x40000 + +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + +#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++) +#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) +#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) + +const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { + {2, 18, 11, 27}, {4, 20, 13, 29}, + {1, 17, 8, 24}, {7, 23, 14, 30}, + {10, 26, 3, 19}, {12, 28, 5, 21}, + {9, 25, 0, 16}, {15, 31, 6, 22} +}; + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 0); +} + +static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN); +} + +static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + } + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the lower chip err count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the higher chip err count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + } else { + /* UMC 6_1_1 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + } + + /* check the MCUMC_STATUS */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); +} + +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint32_t lsb, mc_umc_status_addr; + uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; + struct eeprom_table_record *err_rec; + uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); + } else { + /* UMC 6_1_1 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); + } + + /* skip error address process if -ENOMEM */ + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + /* the lowest lsb bits should be ignored */ + lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr &= ~((0x1ULL << lsb) - 1); + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) { + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_inst; + + err_data->err_addr_cnt++; + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); +} + +static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + } + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .err_cnt_init = umc_v6_1_err_cnt_init, + .ras_late_init = amdgpu_umc_ras_late_init, + .query_ras_error_count = umc_v6_1_query_ras_error_count, + .query_ras_error_address = umc_v6_1_query_ras_error_address, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h new file mode 100644 index 000000000000..0ce1d323cfdd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -0,0 +1,52 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_1_H__ +#define __UMC_V6_1_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* HBM Memory Channel Width */ +#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_1_UMC_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20 0x800 +#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT 0x400 + +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + +extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 01e62fb8e6e0..0fa8aae2d78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -763,7 +763,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 670784a78512..e0aadcaf6c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -268,13 +269,14 @@ err: */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -327,13 +329,20 @@ err: static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -1410,7 +1421,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index a6bfe7651d07..0995378d8263 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -275,13 +276,14 @@ err: * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; @@ -334,13 +336,20 @@ err: static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -1763,7 +1774,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1796,7 +1807,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 475ae68f38f5..217db187207c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -739,7 +739,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index eafbe8d8248d..3fd102efb7af 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -887,7 +887,7 @@ static int vce_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if ((adev->asic_type == CHIP_POLARIS10) || @@ -1070,7 +1070,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dde22b7d140d..71f61afdc655 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" +#include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" #include "soc15_common.h" @@ -36,21 +37,22 @@ #include "mmhub/mmhub_9_1_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" +#include "jpeg_v1_0.h" -#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x05ac -#define mmUVD_REG_XX_MASK_BASE_IDX 1 +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 +#define mmUVD_REG_XX_MASK_1_0 0x05ac +#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); -static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); -static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); + +static void vcn_v1_0_idle_work_handler(struct work_struct *work); /** * vcn_v1_0_early_init - set function pointers @@ -63,13 +65,15 @@ static int vcn_v1_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v1_0_set_dec_ring_funcs(adev); vcn_v1_0_set_enc_ring_funcs(adev); - vcn_v1_0_set_jpeg_ring_funcs(adev); vcn_v1_0_set_irq_funcs(adev); + jpeg_v1_0_early_init(handle); + return 0; } @@ -87,27 +91,26 @@ static int vcn_v1_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); - if (r) - return r; - r = amdgpu_vcn_sw_init(adev); if (r) return r; + /* Override the work func */ + adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; @@ -122,42 +125,36 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; - adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = + adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = adev->vcn.external.data0 = + adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = adev->vcn.external.data1 = + adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = adev->vcn.external.cmd = + adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = adev->vcn.external.nop = + adev->vcn.internal.nop = adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; - sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); - if (r) - return r; - adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = - SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); - return 0; + r = jpeg_v1_0_sw_init(handle); + + return r; } /** @@ -176,6 +173,8 @@ static int vcn_v1_0_sw_fini(void *handle) if (r) return r; + jpeg_v1_0_sw_fini(handle); + r = amdgpu_vcn_sw_fini(adev); return r; @@ -191,7 +190,7 @@ static int vcn_v1_0_sw_fini(void *handle) static int vcn_v1_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; r = amdgpu_ring_test_helper(ring); @@ -199,14 +198,13 @@ static int vcn_v1_0_hw_init(void *handle) goto done; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; - ring->sched.ready = true; + ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) goto done; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->jpeg.inst->ring_dec; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -229,7 +227,7 @@ done: static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || RREG32_SOC15(VCN, 0, mmUVD_STATUS)) @@ -304,9 +302,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -316,17 +314,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -374,9 +372,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); offset = size; WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); @@ -386,9 +384,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, @@ -396,10 +394,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 2: context */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, @@ -779,7 +777,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -837,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) vcn_v1_0_mc_resume_spg_mode(adev); - WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); - WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, - RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3); /* enable VCPU clock */ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); @@ -932,43 +930,28 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - ring = &adev->vcn.ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - /* initialize wptr */ - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - /* copy patch commands to the jpeg ring */ - vcn_v1_0_jpeg_ring_set_patch_ring(ring, - (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + jpeg_v1_0_start(adev, 0); return 0; } static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1105,13 +1088,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - /* initialize JPEG wptr */ - ring = &adev->vcn.ring_jpeg; - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - /* copy patch commands to the jpeg ring */ - vcn_v1_0_jpeg_ring_set_patch_ring(ring, - (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + jpeg_v1_0_start(adev, 1); return 0; } @@ -1222,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) } static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { int ret_code; uint32_t reg_data = 0; @@ -1230,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, struct amdgpu_ring *ring; /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + adev->vcn.inst[inst_idx].pause_state.fw_based, + adev->vcn.inst[inst_idx].pause_state.jpeg, new_state->fw_based, new_state->jpeg); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & @@ -1255,21 +1233,21 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1281,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.fw_based = new_state->fw_based; + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) { DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + adev->vcn.inst[inst_idx].pause_state.fw_based, + adev->vcn.inst[inst_idx].pause_state.jpeg, new_state->fw_based, new_state->jpeg); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & @@ -1315,7 +1294,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->jpeg.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1329,7 +1308,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1341,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.jpeg = new_state->jpeg; + adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg; } return 0; @@ -1369,7 +1348,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ @@ -1596,7 +1575,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1613,7 +1592,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); @@ -1630,7 +1609,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -1715,389 +1694,6 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } - -/** - * vcn_v1_0_jpeg_ring_get_rptr - get read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware read pointer - */ -static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); -} - -/** - * vcn_v1_0_jpeg_ring_get_wptr - get write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware write pointer - */ -static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); -} - -/** - * vcn_v1_0_jpeg_ring_set_wptr - set write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the write pointer to the hardware - */ -static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); -} - -/** - * vcn_v1_0_jpeg_ring_insert_start - insert a start command - * - * @ring: amdgpu_ring pointer - * - * Write a start command to the ring. - */ -static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80010000); -} - -/** - * vcn_v1_0_jpeg_ring_insert_end - insert a end command - * - * @ring: amdgpu_ring pointer - * - * Write a end command to the ring. - */ -static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00010000); -} - -/** - * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command - * - * @ring: amdgpu_ring pointer - * @fence: fence to emit - * - * Write a fence and a trap command to the ring. - */ -static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) -{ - struct amdgpu_device *adev = ring->adev; - - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0xffffffff); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x3fbc); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x1); - - /* emit trap */ - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); - amdgpu_ring_write(ring, 0); -} - -/** - * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer - * - * @ring: amdgpu_ring pointer - * @ib: indirect buffer to execute - * - * Write ring commands to execute the indirect buffer. - */ -static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmid = AMDGPU_JOB_GET_VMID(job); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, ib->length_dw); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); - amdgpu_ring_write(ring, 0x2); -} - -static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, val); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE3)); - } - amdgpu_ring_write(ring, mask); -} - -static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) -{ - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t data0, data1, mask; - - pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - - /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); -} - -static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - } - amdgpu_ring_write(ring, val); -} - -static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) -{ - int i; - - WARN_ON(ring->wptr % 2 || count % 2); - - for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } -} - -static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) -{ - struct amdgpu_device *adev = ring->adev; - ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - ring->ring[(*ptr)++] = 0; - ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); - } else { - ring->ring[(*ptr)++] = reg_offset; - ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); - } - ring->ring[(*ptr)++] = val; -} - -static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) -{ - struct amdgpu_device *adev = ring->adev; - - uint32_t reg, reg_offset, val, mask, i; - - // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); - reg_offset = (reg << 2); - val = lower_32_bits(ring->gpu_addr); - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); - reg_offset = (reg << 2); - val = upper_32_bits(ring->gpu_addr); - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 3rd to 5th: issue MEM_READ commands - for (i = 0; i <= 2; i++) { - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); - ring->ring[ptr++] = 0; - } - - // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x13; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 7th: program mmUVD_JRBC_RB_REF_DATA - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA); - reg_offset = (reg << 2); - val = 0x1; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x1; - mask = 0x1; - - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); - ring->ring[ptr++] = 0x01400200; - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); - ring->ring[ptr++] = val; - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - ring->ring[ptr++] = 0; - ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); - } else { - ring->ring[ptr++] = reg_offset; - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); - } - ring->ring[ptr++] = mask; - - //9th to 21st: insert no-op - for (i = 0; i <= 12; i++) { - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); - ring->ring[ptr++] = 0; - } - - //22nd: reset mmUVD_JRBC_RB_RPTR - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR); - reg_offset = (reg << 2); - val = 0; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x12; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); -} - static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -2114,16 +1710,13 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case 119: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case 120: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); - break; - case 126: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2173,6 +1766,86 @@ static int vcn_v1_0_set_powergating_state(void *handle, return ret; } +static void vcn_v1_0_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned int fences = 0, i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + adev->vcn.pause_dpg_mode(adev, 0, &new_state); + } + + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec); + + if (fences == 0) { + amdgpu_gfx_off_ctrl(adev, true); + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks) { + amdgpu_gfx_off_ctrl(adev, false); + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + unsigned int fences = 0, i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + + adev->vcn.pause_dpg_mode(adev, 0, &new_state); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -2198,7 +1871,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2219,7 +1892,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, + .begin_use = vcn_v1_0_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, @@ -2232,7 +1905,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, @@ -2251,51 +1924,16 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v1_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, + .begin_use = vcn_v1_0_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_JPEG, - .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), - .support_64bit_ptrs = false, - .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, - .extra_dw = 64, - .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, - .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, - .set_wptr = vcn_v1_0_jpeg_ring_set_wptr, - .emit_frame_size = - 6 + 6 + /* hdp invalidate / flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */ - 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */ - 6, - .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */ - .emit_ib = vcn_v1_0_jpeg_ring_emit_ib, - .emit_fence = vcn_v1_0_jpeg_ring_emit_fence, - .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_jpeg_ring_test_ring, - .test_ib = amdgpu_vcn_jpeg_ring_test_ib, - .insert_nop = vcn_v1_0_jpeg_ring_nop, - .insert_start = vcn_v1_0_jpeg_ring_insert_start, - .insert_end = vcn_v1_0_jpeg_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, - .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2304,17 +1942,11 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } -static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) -{ - adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; - DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); -} - static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { .set = vcn_v1_0_set_interrupt_state, .process = vcn_v1_0_process_interrupt, @@ -2322,8 +1954,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v1_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h index 2a497a7a4840..f67d7391fc21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h @@ -24,6 +24,8 @@ #ifndef __VCN_V1_0_H__ #define __VCN_V1_0_H__ +void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring); + extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index dfde886cc6bd..c387c81f8695 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vcn.h" #include "soc15.h" @@ -47,39 +47,13 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 -#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff -#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 -#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a -#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb -#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf -#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 -#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed -#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 -#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 -#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 -#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f - -#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 - -#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x026c -#define mmUVD_REG_XX_MASK_BASE_IDX 1 - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); -static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); /** * vcn_v2_0_early_init - set function pointers @@ -92,11 +66,11 @@ static int vcn_v2_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v2_0_set_dec_ring_funcs(adev); vcn_v2_0_set_enc_ring_funcs(adev); - vcn_v2_0_set_jpeg_ring_funcs(adev); vcn_v2_0_set_irq_funcs(adev); return 0; @@ -118,7 +92,7 @@ static int vcn_v2_0_sw_init(void *handle) /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; @@ -126,18 +100,11 @@ static int vcn_v2_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, - &adev->vcn.irq); - if (r) - return r; - r = amdgpu_vcn_sw_init(adev); if (r) return r; @@ -156,50 +123,46 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; - adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; - adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; - adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; - adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; - adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; - ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); - if (r) - return r; - adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); - return 0; } @@ -234,35 +197,21 @@ static int vcn_v2_0_sw_fini(void *handle) static int vcn_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; - adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index); + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, 0); - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + ring = &adev->vcn.inst->ring_enc[i]; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } - } - - ring = &adev->vcn.ring_jpeg; - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; - goto done; } done: @@ -283,7 +232,7 @@ done: static int vcn_v2_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || @@ -294,13 +243,10 @@ static int vcn_v2_0_hw_fini(void *handle) ring->sched.ready = false; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = false; } - ring = &adev->vcn.ring_jpeg; - ring->sched.ready = false; - return 0; } @@ -368,9 +314,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -380,22 +326,21 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) @@ -406,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -633,146 +578,23 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } /** - * jpeg_v2_0_start - start JPEG block - * - * @adev: amdgpu_device pointer - * - * Setup and start the JPEG block - */ -static int jpeg_v2_0_start(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; - uint32_t tmp; - int r = 0; - - /* disable power gating */ - tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); - - SOC15_WAIT_ON_RREG(VCN, 0, - mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); - - if (r) { - DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); - return r; - } - - /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); - - /* JPEG disable CGC */ - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK - | JPEG_CGC_GATE__JPEG2_DEC_MASK - | JPEG_CGC_GATE__JPEG_ENC_MASK - | JPEG_CGC_GATE__JMCIF_MASK - | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - return 0; -} - -/** - * jpeg_v2_0_stop - stop JPEG block - * - * @adev: amdgpu_device pointer - * - * stop the JPEG block - */ -static int jpeg_v2_0_stop(struct amdgpu_device *adev) -{ - uint32_t tmp; - int r = 0; - - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable JPEG CGC */ - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK - |JPEG_CGC_GATE__JPEG2_DEC_MASK - |JPEG_CGC_GATE__JPEG_ENC_MASK - |JPEG_CGC_GATE__JMCIF_MASK - |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - /* enable power gating */ - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)); - tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; - tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); - - tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); - - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, - (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); - - if (r) { - DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); - return r; - } - - return r; -} - -/** * vcn_v2_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer @@ -920,7 +742,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; vcn_v2_0_enable_static_power_gating(adev); @@ -932,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -941,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -957,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -986,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - - (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -1046,7 +868,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -1054,12 +876,8 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); - if (r) - return r; - goto jpeg; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); vcn_v2_0_disable_static_power_gating(adev); @@ -1197,24 +1015,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); -jpeg: - r = jpeg_v2_0_start(adev); - - return r; + return 0; } static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) @@ -1233,9 +1048,6 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); @@ -1254,10 +1066,6 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) uint32_t tmp; int r; - r = jpeg_v2_0_stop(adev); - if (r) - return r; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v2_0_stop_dpg_mode(adev); if (r) @@ -1322,16 +1130,16 @@ power_off: } static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d -> %d", - adev->vcn.pause_state.fw_based, new_state->fw_based); + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); @@ -1351,14 +1159,14 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1377,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.fw_based = new_state->fw_based; + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } return 0; @@ -1405,7 +1213,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ @@ -1480,11 +1288,13 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); } @@ -1495,9 +1305,11 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); } @@ -1508,14 +1320,15 @@ static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * * Write a nop command to the ring. */ -static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { + struct amdgpu_device *adev = ring->adev; int i; WARN_ON(ring->wptr % 2 || count % 2); for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); amdgpu_ring_write(ring, 0); } } @@ -1528,30 +1341,31 @@ static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun * * Write a fence and a trap command to the ring. */ -static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) +void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) { - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); amdgpu_ring_write(ring, seq); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, addr & 0xffffffff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); } @@ -1564,44 +1378,46 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * * Write ring commands to execute the indirect buffer */ -static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { + struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); amdgpu_ring_write(ring, vmid); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); } -static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t data0, data1, mask; @@ -1615,16 +1431,18 @@ static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); } @@ -1640,7 +1458,7 @@ static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1657,7 +1475,7 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else @@ -1681,7 +1499,7 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1706,8 +1524,8 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * * Write enc a fence and a trap command to the ring. */ -static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, - u64 seq, unsigned flags) +void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1718,7 +1536,7 @@ static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); } -static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, VCN_ENC_CMD_END); } @@ -1731,10 +1549,10 @@ static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) * * Write enc ring commands to execute the indirect buffer */ -static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1745,9 +1563,8 @@ static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, reg << 2); @@ -1755,8 +1572,8 @@ static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) +void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; @@ -1767,282 +1584,13 @@ static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, lower_32_bits(pd_addr), 0xffffffff); } -static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); } -/** - * vcn_v2_0_jpeg_ring_get_rptr - get read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware read pointer - */ -static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); -} - -/** - * vcn_v2_0_jpeg_ring_get_wptr - get write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware write pointer - */ -static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) - return adev->wb.wb[ring->wptr_offs]; - else - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); -} - -/** - * vcn_v2_0_jpeg_ring_set_wptr - set write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the write pointer to the hardware - */ -static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) { - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); - } -} - -/** - * vcn_v2_0_jpeg_ring_insert_start - insert a start command - * - * @ring: amdgpu_ring pointer - * - * Write a start command to the ring. - */ -static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80010000); -} - -/** - * vcn_v2_0_jpeg_ring_insert_end - insert a end command - * - * @ring: amdgpu_ring pointer - * - * Write a end command to the ring. - */ -static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00010000); -} - -/** - * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command - * - * @ring: amdgpu_ring pointer - * @fence: fence to emit - * - * Write a fence and a trap command to the ring. - */ -static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) -{ - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x3fbc); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x1); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); - amdgpu_ring_write(ring, 0); -} - -/** - * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer - * - * @ring: amdgpu_ring pointer - * @ib: indirect buffer to execute - * - * Write ring commands to execute the indirect buffer. - */ -static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) -{ - unsigned vmid = AMDGPU_JOB_GET_VMID(job); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, ib->length_dw); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); - amdgpu_ring_write(ring, 0x2); -} - -static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) -{ - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, val); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE3)); - } - amdgpu_ring_write(ring, mask); -} - -static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) -{ - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t data0, data1, mask; - - pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - - /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); -} - -static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) -{ - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - } - amdgpu_ring_write(ring, val); -} - -static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) -{ - int i; - - WARN_ON(ring->wptr % 2 || count % 2); - - for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } -} - static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -2059,16 +1607,13 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); - break; - case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2079,27 +1624,27 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; unsigned i; int r; - WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 4); if (r) return r; - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -2158,7 +1703,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, .set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2189,7 +1734,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, @@ -2215,39 +1760,9 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_JPEG, - .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, - .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, - .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, - .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ - 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ - 8 + 16, - .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ - .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, - .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_jpeg_ring_test_ring, - .test_ib = amdgpu_vcn_jpeg_ring_test_ib, - .insert_nop = vcn_v2_0_jpeg_ring_nop, - .insert_start = vcn_v2_0_jpeg_ring_insert_start, - .insert_end = vcn_v2_0_jpeg_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2256,17 +1771,11 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } -static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) -{ - adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; - DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); -} - static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { .set = vcn_v2_0_set_interrupt_state, .process = vcn_v2_0_process_interrupt, @@ -2274,8 +1783,8 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v2_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h index a74227f4663b..6c9de1882428 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -24,6 +24,32 @@ #ifndef __VCN_V2_0_H__ #define __VCN_V2_0_H__ +extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val); +extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring); + +extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags); +extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; #endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c new file mode 100644 index 000000000000..2d64ba1adf99 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -0,0 +1,1799 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" +#include "mmsch_v1_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c + +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * vcn_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } + + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; + + if (amdgpu_sriov_vf(adev)) { + adev->vcn.num_vcn_inst = 2; + adev->vcn.harvest_config = 0; + adev->vcn.num_enc_rings = 1; + } else { + adev->vcn.num_enc_rings = 2; + } + + vcn_v2_5_set_dec_ring_funcs(adev); + vcn_v2_5_set_enc_ring_funcs(adev); + vcn_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + + ring = &adev->vcn.inst[j].ring_dec; + ring->use_doorbell = true; + + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); + sprintf(ring->name, "vcn_dec_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->use_doorbell = true; + + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); + + sprintf(ring->name, "vcn_enc_%d.%d", j, i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + } + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v2_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r = 0; + + if (amdgpu_sriov_vf(adev)) + r = vcn_v2_5_sriov_start(adev); + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + + if (amdgpu_sriov_vf(adev)) { + adev->vcn.inst[j].ring_enc[0].sched.ready = true; + adev->vcn.inst[j].ring_enc[1].sched.ready = false; + adev->vcn.inst[j].ring_enc[2].sched.ready = false; + adev->vcn.inst[j].ring_dec.sched.ready = true; + } else { + + ring = &adev->vcn.inst[j].ring_dec; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + } + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_dec; + + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) + vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = false; + } + } + + return 0; +} + +/** + * vcn_v2_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_5_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_5_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_5_hw_init(adev); + + return r; +} + +/** + * vcn_v2_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } +} + +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v2_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * + * Disable clock gating for VCN block + */ +static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int ret = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * vcn_v2_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * + * Enable clock gating for VCN block + */ +static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + +static int vcn_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return 0; + + /*SW clock gating */ + vcn_v2_5_disable_clock_gating(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + } + + vcn_v2_5_mc_resume(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* VCN global tiling registers */ + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + if (status & 2) + break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } + + return 0; +} + +static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev, + struct amdgpu_mm_table *table) +{ + uint32_t data = 0, loop = 0, size = 0; + uint64_t addr = table->gpu_addr; + struct mmsch_v1_1_init_header *header = NULL;; + + header = (struct mmsch_v1_1_init_header *)table->cpu_addr; + size = header->total_size; + + /* + * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of + * memory descriptor location + */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); + + /* 2, update vmid of descriptor */ + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID); + data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data); + + /* 3, notify mmsch about the size of this descriptor */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + + /* + * 5, kick off the initialization and wait until + * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001); + + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + loop = 10; + while ((data & 0x10000002) != 0x10000002) { + udelay(100); + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + loop--; + if (!loop) + break; + } + + if (!loop) { + dev_err(adev->dev, + "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n", + data); + return -EBUSY; + } + + return 0; +} + +static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t offset, size, tmp, i, rb_bufsz; + uint32_t table_size = 0; + struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; + struct mmsch_v1_0_cmd_end end = { { 0 } }; + uint32_t *init_table = adev->virt.mm_table.cpu_addr; + struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table; + + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + header->version = MMSCH_VERSION; + header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2; + init_table += header->total_size; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + header->eng[i].table_offset = header->total_size; + header->eng[i].init_status = 0; + header->eng[i].table_size = 0; + + table_size = 0; + + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + /* mc resume*/ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + offset = 0; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), + size); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), + upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), + ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(ring->gpu_addr)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + + /* add end packet */ + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; + init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4; + + /* refine header */ + header->eng[i].table_size = table_size; + header->total_size += table_size; + } + + return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); +} + +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + + vcn_v2_5_enable_clock_gating(adev); + + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_dec_ring_get_rptr, + .get_wptr = vcn_v2_5_dec_ring_get_wptr, + .set_wptr = vcn_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = vcn_v2_0_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v2_5_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } +} + +/** + * vcn_v2_5_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_enc_ring_get_rptr, + .get_wptr = vcn_v2_5_enc_ring_get_wptr, + .set_wptr = vcn_v2_5_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } +} + +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i, j; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].me = j; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); + } +} + +static bool vcn_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +static int vcn_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + if (ret) + return ret; + } + + return ret; +} + +static int vcn_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (enable) { + if (vcn_v2_5_is_idle(handle)) + return -EBUSY; + vcn_v2_5_enable_clock_gating(adev); + } else { + vcn_v2_5_disable_clock_gating(adev); + } + + return 0; +} + +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if(state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_5_stop(adev); + else + ret = vcn_v2_5_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + + return ret; +} + +static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { + .set = vcn_v2_5_set_interrupt_state, + .process = vcn_v2_5_process_interrupt, +}; + +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { + .name = "vcn_v2_5", + .early_init = vcn_v2_5_early_init, + .late_init = NULL, + .sw_init = vcn_v2_5_sw_init, + .sw_fini = vcn_v2_5_sw_fini, + .hw_init = vcn_v2_5_hw_init, + .hw_fini = vcn_v2_5_hw_fini, + .suspend = vcn_v2_5_suspend, + .resume = vcn_v2_5_resume, + .is_idle = vcn_v2_5_is_idle, + .wait_for_idle = vcn_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_5_set_clockgating_state, + .set_powergating_state = vcn_v2_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &vcn_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h new file mode 100644 index 000000000000..8d9c0800b8e0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_5_H__ +#define __VCN_V2_5_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; + +#endif /* __VCN_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 22260e6963b8..407c6093c2ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -50,7 +50,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -64,7 +64,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -80,7 +80,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -106,7 +106,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -125,7 +125,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -145,7 +145,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -219,14 +219,14 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) static int vega10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih; - u32 ih_rb_cntl; + u32 ih_rb_cntl, ih_chicken; int ret = 0; u32 tmp; /* disable irqs */ vega10_ih_disable_interrupts(adev); - adev->nbio_funcs->ih_control(adev); + adev->nbio.funcs->ih_control(adev); ih = &adev->irq.ih; /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -237,8 +237,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return -ETIMEDOUT; @@ -247,6 +246,20 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); } + if ((adev->asic_type == CHIP_ARCTURUS && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + adev->asic_type == CHIP_RENOIR) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } else { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_FBPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } + /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); @@ -272,7 +285,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -299,7 +312,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -664,10 +677,49 @@ static int vega10_ih_soft_reset(void *handle) return 0; } +static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + /** + * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE + * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. + */ + if (adev->asic_type > CHIP_VEGA10) { + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); + } + + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } +} + static int vega10_ih_set_clockgating_state(void *handle, enum amd_clockgating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega10_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); return 0; + } static int vega10_ih_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index a8e92638a2e8..6b52a539d51b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega10_ip_offset.h" int vega10_reg_base_init(struct amdgpu_device *adev) @@ -81,6 +80,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 0db84386252a..556f854e3551 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega20_ip_offset.h" int vega20_reg_base_init(struct amdgpu_device *adev) @@ -50,6 +49,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } @@ -85,6 +86,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6575ddcfcf00..78b35901643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -690,15 +690,15 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) } /** - * vi_asic_reset - soft reset GPU + * vi_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int vi_asic_reset(struct amdgpu_device *adev) +static int vi_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -711,6 +711,70 @@ static int vi_asic_reset(struct amdgpu_device *adev) return r; } +static bool vi_asic_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + return amdgpu_dpm_is_baco_supported(adev); + default: + return false; + } +} + +static enum amd_reset_method +vi_asic_reset_method(struct amdgpu_device *adev) +{ + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + baco_reset = amdgpu_dpm_is_baco_supported(adev); + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * vi_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int vi_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + r = amdgpu_dpm_baco_reset(adev); + } else { + r = vi_asic_pci_config_reset(adev); + } + + return r; +} + static u32 vi_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1023,6 +1087,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .read_bios_from_rom = &vi_read_bios_from_rom, .read_register = &vi_read_register, .reset = &vi_asic_reset, + .reset_method = &vi_asic_reset_method, .set_vga_state = &vi_vga_set_state, .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, @@ -1035,6 +1100,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, .get_pcie_replay_count = &vi_get_pcie_replay_count, + .supports_baco = &vi_asic_supports_baco, }; #define CZ_REV_BRISTOL(rev) \ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 8de0772f986c..defb4aaf929a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,4 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); + #endif |