diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
200 files changed, 6363 insertions, 2472 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 844f0a162981..a04f2fc7bf37 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK  config DRM_AMDGPU_USERPTR  	bool "Always enable userptr write support"  	depends on DRM_AMDGPU -	select MMU_NOTIFIER +	depends on ARCH_HAS_HMM +	select HMM_MIRROR  	help -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already -	  selected to enabled full userptr support. +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it +	  isn't already selected to enabled full userptr support.  config DRM_AMDGPU_GART_DEBUGFS  	bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index fdd0ca4b0f0b..57ce44cc3226 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \  	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \  	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \  	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ -	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ +	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \  	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \  	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \  	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ @@ -173,7 +173,7 @@ endif  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o  include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 14398f55f602..58f8f132904d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;  extern int amdgpu_hw_i2c;  extern int amdgpu_pcie_gen2;  extern int amdgpu_msi; -extern int amdgpu_lockup_timeout;  extern int amdgpu_dpm;  extern int amdgpu_fw_load_type;  extern int amdgpu_aspm; @@ -211,6 +210,7 @@ struct amdgpu_irq_src;  struct amdgpu_fpriv;  struct amdgpu_bo_va_mapping;  struct amdgpu_atif; +struct kfd_vm_fault_info;  enum amdgpu_cp_irq {  	AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -415,6 +415,7 @@ struct amdgpu_fpriv {  };  int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);  int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		  unsigned size, struct amdgpu_ib *ib); @@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {  			       uint64_t *count1);  	/* do we need to reset the asic at init time (e.g., kexec) */  	bool (*need_reset_on_init)(struct amdgpu_device *adev); +	/* PCIe replay counter */ +	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);  };  /* @@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {  	u32 ref_and_mask_sdma1;  }; +struct amdgpu_mmio_remap { +	u32 reg_offset; +	resource_size_t bus_addr; +}; +  struct amdgpu_nbio_funcs {  	const struct nbio_hdp_flush_reg *hdp_flush_reg;  	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); @@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {  	void (*ih_control)(struct amdgpu_device *adev);  	void (*init_registers)(struct amdgpu_device *adev);  	void (*detect_hw_virt)(struct amdgpu_device *adev); +	void (*remap_hdp_registers)(struct amdgpu_device *adev);  };  struct amdgpu_df_funcs { @@ -680,6 +689,12 @@ struct amdgpu_df_funcs {  				      u32 *flags);  	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,  					    bool enable); +	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, +					 int is_enable); +	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, +					 int is_disable); +	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, +					 uint64_t *count);  };  /* Define the HW IP blocks will be used in driver , add more if necessary */  enum amd_hw_ip_block_type { @@ -764,6 +779,7 @@ struct amdgpu_device {  	void __iomem			*rmmio;  	/* protects concurrent MM_INDEX/DATA based register access */  	spinlock_t mmio_idx_lock; +	struct amdgpu_mmio_remap        rmmio_remap;  	/* protects concurrent SMC based register access */  	spinlock_t smc_idx_lock;  	amdgpu_rreg_t			smc_rreg; @@ -936,6 +952,13 @@ struct amdgpu_device {  	struct work_struct		xgmi_reset_work;  	bool                            in_baco_reset; + +	long				gfx_timeout; +	long				sdma_timeout; +	long				video_timeout; +	long				compute_timeout; + +	uint64_t			unique_id;  };  static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);  #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))  #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))  #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) +#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))  /* Common functions */  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,  					     const u32 array_size);  bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, +				      struct amdgpu_device *peer_adev); +  /* atpx handler */  #if defined(CONFIG_VGA_SWITCHEROO)  void amdgpu_register_atpx_handler(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index aeead072fa79..4af3989e4a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -25,8 +25,10 @@  #include <drm/drmP.h>  #include "amdgpu.h"  #include "amdgpu_gfx.h" +#include "amdgpu_dma_buf.h"  #include <linux/module.h>  #include <linux/dma-buf.h> +#include "amdgpu_xgmi.h"  static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)  		};  		/* this is going to have a few of the MSBs set that we need to -		 * clear */ +		 * clear +		 */  		bitmap_complement(gpu_resources.queue_bitmap,  				  adev->gfx.mec.queue_bitmap,  				  KGD_MAX_QUEUES); @@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)  				  gpu_resources.queue_bitmap);  		/* According to linux/bitmap.h we shouldn't use bitmap_clear if -		 * nbits is not compile time constant */ +		 * nbits is not compile time constant +		 */  		last_valid_bit = 1 /* only first MEC can have compute queues */  				* adev->gfx.mec.num_pipe_per_mec  				* adev->gfx.mec.num_queue_per_pipe; @@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)  	amdgpu_bo_unref(&(bo));  } +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, +				void **mem_obj) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +	struct amdgpu_bo *bo = NULL; +	struct amdgpu_bo_param bp; +	int r; + +	memset(&bp, 0, sizeof(bp)); +	bp.size = size; +	bp.byte_align = 1; +	bp.domain = AMDGPU_GEM_DOMAIN_GWS; +	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; +	bp.type = ttm_bo_type_device; +	bp.resv = NULL; + +	r = amdgpu_bo_create(adev, &bp, &bo); +	if (r) { +		dev_err(adev->dev, +			"failed to allocate gws BO for amdkfd (%d)\n", r); +		return r; +	} + +	*mem_obj = bo; +	return 0; +} + +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +{ +	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; + +	amdgpu_bo_unref(&bo); +} +  uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,  				      enum kgd_engine_type type)  { @@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)  	return adev->gmc.xgmi.hive_id;  } +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +{ +	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; +	struct amdgpu_device *adev = (struct amdgpu_device *)dst; +	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); + +	if (ret < 0) { +		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n", +			adev->gmc.xgmi.physical_node_id, +			peer_adev->gmc.xgmi.physical_node_id, ret); +		ret = 0; +	} +	return  (uint8_t)ret; +} + +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + +	return adev->rmmio_remap.bus_addr; +} + +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + +	return adev->gds.gws_size; +}  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,  				uint32_t vmid, uint64_t gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e37fa7e85b1..f968bf147c5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem {  	atomic_t invalid;  	struct amdkfd_process_info *process_info; -	struct page **user_pages;  	struct amdgpu_sync sync; @@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  				void **mem_obj, uint64_t *gpu_addr,  				void **cpu_ptr, bool mqd_gfx9);  void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);  uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,  				      enum kgd_engine_type type);  void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, @@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,  				  uint32_t *flags);  uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);  uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);  #define read_user_wptr(mmptr, wptr, dst)				\  	({								\ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index fa09e11a600c..c6abcf72e822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)  	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +  			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; -	pr_debug("kfd: sdma base address: 0x%x\n", retval); +	pr_debug("sdma base address: 0x%x\n", retval);  	return retval;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index fec3a6aa1de6..4e8b4e949926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)  	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +  		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; -	pr_debug("kfd: sdma base address: 0x%x\n", retval); +	pr_debug("sdma base address: 0x%x\n", retval);  	return retval;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ef3d93b995b2..d5af41143d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,  	lock_srbm(kgd, 0, 0, 0, vmid); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);  	/* APE1 no longer exists on GFX9 */  	unlock_srbm(kgd); @@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,  		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));  		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,  			((mec << 5) | (pipe << 3) | queue_id | 0x80)); -		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); +		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);  	}  	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,  	for (reg = hqd_base;  	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) -		WREG32(reg, mqd_hqd[reg - hqd_base]); +		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);  	/* Activate doorbell logic before triggering WPTR poll. */  	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,  			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);  	if (wptr) {  		/* Don't read wptr with get_user because the user @@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,  		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);  		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; -		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), +		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),  		       lower_32_bits(guessed_wptr)); -		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), +		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),  		       upper_32_bits(guessed_wptr)); -		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), +		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),  		       lower_32_bits((uintptr_t)wptr)); -		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), +		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),  		       upper_32_bits((uintptr_t)wptr));  		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),  		       get_queue_mask(adev, pipe_id, queue_id));  	}  	/* Start the EOP fetcher */ -	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),  	       REG_SET_FIELD(m->cp_hqd_eop_rptr,  			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));  	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);  	release_queue(kgd); @@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,  	acquire_queue(kgd, pipe_id, queue_id);  	if (m->cp_hqd_vmid == 0) -		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); +		WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);  	switch (reset_type) {  	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,  		break;  	} -	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);  	end_jiffies = (utimeout * HZ / 1000) + jiffies;  	while (true) { @@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,  	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;  } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - -	/* Use legacy mode tlb invalidation. -	 * -	 * Currently on Raven the code below is broken for anything but -	 * legacy mode due to a MMHUB power gating problem. A workaround -	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ -	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack -	 * bit. -	 * -	 * TODO 1: agree on the right set of invalidation registers for -	 * KFD use. Use the last one for now. Invalidate both GC and -	 * MMHUB. -	 * -	 * TODO 2: support range-based invalidation, requires kfg2kgd -	 * interface change -	 */ -	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, +			uint32_t flush_type)  {  	signed long r;  	uint32_t seq; @@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)  			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |  			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |  			PACKET3_INVALIDATE_TLBS_PASID(pasid) | -			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));  	amdgpu_fence_emit_polling(ring, &seq);  	amdgpu_ring_commit(ring);  	spin_unlock(&adev->gfx.kiq.ring_lock); @@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;  	int vmid;  	struct amdgpu_ring *ring = &adev->gfx.kiq.ring; +	uint32_t flush_type = 0;  	if (adev->in_gpu_reset)  		return -EIO; +	if (adev->gmc.xgmi.num_physical_nodes && +		adev->asic_type == CHIP_VEGA20) +		flush_type = 2;  	if (ring->sched.ready) -		return invalidate_tlbs_with_kiq(adev, pasid); +		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);  	for (vmid = 0; vmid < 16; vmid++) {  		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) @@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {  			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)  				== pasid) { -				write_vmid_invalidate_request(kgd, vmid); +				amdgpu_gmc_flush_gpu_tlb(adev, vmid, +							 flush_type);  				break;  			}  		} @@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  		return 0;  	} -	write_vmid_invalidate_request(kgd, vmid); +	/* Use legacy mode tlb invalidation. +	 * +	 * Currently on Raven the code below is broken for anything but +	 * legacy mode due to a MMHUB power gating problem. A workaround +	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ +	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack +	 * bit. +	 * +	 * TODO 1: agree on the right set of invalidation registers for +	 * KFD use. Use the last one for now. Invalidate both GC and +	 * MMHUB. +	 * +	 * TODO 2: support range-based invalidation, requires kfg2kgd +	 * interface change +	 */ +	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);  	return 0;  } @@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,  	mutex_lock(&adev->grbm_idx_mutex); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); +	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);  	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);  	data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,  	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,  		SE_BROADCAST_WRITES, 1); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); +	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);  	mutex_unlock(&adev->grbm_idx_mutex);  	return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a6e5184d436c..87177ed37dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -30,6 +30,7 @@  #include "amdgpu_object.h"  #include "amdgpu_vm.h"  #include "amdgpu_amdkfd.h" +#include "amdgpu_dma_buf.h"  /* Special VM and GART address alignment needed for VI pre-Fiji due to   * a HW bug. @@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,  	mutex_unlock(&process_info->lock);  } +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, +		struct amdkfd_process_info *process_info) +{ +	struct ttm_validate_buffer *bo_list_entry; + +	bo_list_entry = &mem->validate_list; +	mutex_lock(&process_info->lock); +	list_del(&bo_list_entry->head); +	mutex_unlock(&process_info->lock); +} +  /* Initializes user pages. It registers the MMU notifier and validates   * the userptr BO in the GTT domain.   * @@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,  		goto out;  	} -	/* If no restore worker is running concurrently, user_pages -	 * should not be allocated -	 */ -	WARN(mem->user_pages, "Leaking user_pages array"); - -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, -					   sizeof(struct page *), -					   GFP_KERNEL | __GFP_ZERO); -	if (!mem->user_pages) { -		pr_err("%s: Failed to allocate pages array\n", __func__); -		ret = -ENOMEM; -		goto unregister_out; -	} - -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);  	if (ret) {  		pr_err("%s: Failed to get user pages: %d\n", __func__, ret); -		goto free_out; +		goto unregister_out;  	} -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); -  	ret = amdgpu_bo_reserve(bo, true);  	if (ret) {  		pr_err("%s: Failed to reserve BO\n", __func__); @@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,  	amdgpu_bo_unreserve(bo);  release_out: -	if (ret) -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: -	kvfree(mem->user_pages); -	mem->user_pages = NULL; +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);  unregister_out:  	if (ret)  		amdgpu_mn_unregister(bo); @@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,  	ctx->kfd_bo.priority = 0;  	ctx->kfd_bo.tv.bo = &bo->tbo;  	ctx->kfd_bo.tv.num_shared = 1; -	ctx->kfd_bo.user_pages = NULL;  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);  	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,  	ctx->kfd_bo.priority = 0;  	ctx->kfd_bo.tv.bo = &bo->tbo;  	ctx->kfd_bo.tv.num_shared = 1; -	ctx->kfd_bo.user_pages = NULL;  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);  	i = 0; @@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,  				  AMDGPU_FENCE_OWNER_KFD, false);  	if (ret)  		goto wait_pd_fail; +	ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); +	if (ret) +		goto reserve_shared_fail;  	amdgpu_bo_fence(vm->root.base.bo,  			&vm->process_info->eviction_fence->base, true);  	amdgpu_bo_unreserve(vm->root.base.bo); @@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,  	return 0; +reserve_shared_fail:  wait_pd_fail:  validate_pd_fail:  	amdgpu_bo_unreserve(vm->root.base.bo); @@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  		if (!offset || !*offset)  			return -EINVAL;  		user_addr = *offset; -	} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { +	} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | +			ALLOC_MEM_FLAGS_MMIO_REMAP)) {  		domain = AMDGPU_GEM_DOMAIN_GTT;  		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;  		bo_type = ttm_bo_type_sg; @@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	if (user_addr) {  		ret = init_user_pages(*mem, current->mm, user_addr); -		if (ret) { -			mutex_lock(&avm->process_info->lock); -			list_del(&(*mem)->validate_list.head); -			mutex_unlock(&avm->process_info->lock); +		if (ret)  			goto allocate_init_user_pages_failed; -		}  	}  	if (offset) @@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	return 0;  allocate_init_user_pages_failed: +	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);  	amdgpu_bo_unref(&bo);  	/* Don't unreserve system mem limit twice */  	goto err_reserve_limit; @@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	list_del(&bo_list_entry->head);  	mutex_unlock(&process_info->lock); -	/* Free user pages if necessary */ -	if (mem->user_pages) { -		pr_debug("%s: Freeing user_pages array\n", __func__); -		if (mem->user_pages[0]) -			release_pages(mem->user_pages, -					mem->bo->tbo.ttm->num_pages); -		kvfree(mem->user_pages); -	} -  	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);  	if (unlikely(ret))  		return ret; @@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	/* Free the sync object */  	amdgpu_sync_free(&mem->sync); -	/* If the SG is not NULL, it's one we created for a doorbell -	 * BO. We need to free it. +	/* If the SG is not NULL, it's one we created for a doorbell or mmio +	 * remap BO. We need to free it.  	 */  	if (mem->bo->tbo.sg) {  		sg_free_table(mem->bo->tbo.sg); @@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(  			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,  					      is_invalid_userptr);  			if (ret) { -				pr_err("Failed to map radeon bo to gpuvm\n"); +				pr_err("Failed to map bo to gpuvm\n");  				goto map_bo_to_gpuvm_failed;  			} @@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,  		bo = mem->bo; -		if (!mem->user_pages) { -			mem->user_pages = -				kvmalloc_array(bo->tbo.ttm->num_pages, -						 sizeof(struct page *), -						 GFP_KERNEL | __GFP_ZERO); -			if (!mem->user_pages) { -				pr_err("%s: Failed to allocate pages array\n", -				       __func__); -				return -ENOMEM; -			} -		} else if (mem->user_pages[0]) { -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -		} -  		/* Get updated user pages */  		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, -						   mem->user_pages); +						   bo->tbo.ttm->pages);  		if (ret) { -			mem->user_pages[0] = NULL; +			bo->tbo.ttm->pages[0] = NULL;  			pr_info("%s: Failed to get user pages: %d\n",  				__func__, ret);  			/* Pretend it succeeded. It will fail later @@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,  			 * stalled user mode queues.  			 */  		} - -		/* Mark the BO as valid unless it was invalidated -		 * again concurrently -		 */ -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) -			return -EAGAIN;  	}  	return 0;  } +/* Remove invalid userptr BOs from hmm track list + * + * Stop HMM track the userptr update + */ +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) +{ +	struct kgd_mem *mem, *tmp_mem; +	struct amdgpu_bo *bo; + +	list_for_each_entry_safe(mem, tmp_mem, +				 &process_info->userptr_inval_list, +				 validate_list.head) { +		bo = mem->bo; +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +	} +} +  /* Validate invalid userptr BOs   *   * Validates BOs on the userptr_inval_list, and moves them back to the @@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  				     GFP_KERNEL);  	if (!pd_bo_list_entries) {  		pr_err("%s: Failed to allocate PD BO list entries\n", __func__); -		return -ENOMEM; +		ret = -ENOMEM; +		goto out_no_mem;  	}  	INIT_LIST_HEAD(&resv_list); @@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);  	WARN(!list_empty(&duplicates), "Duplicates should be empty");  	if (ret) -		goto out; +		goto out_free;  	amdgpu_sync_create(&sync); @@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  		bo = mem->bo; -		/* Copy pages array and validate the BO if we got user pages */ -		if (mem->user_pages[0]) { -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, -						     mem->user_pages); +		/* Validate the BO if we got user pages */ +		if (bo->tbo.ttm->pages[0]) {  			amdgpu_bo_placement_from_domain(bo, mem->domain);  			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  			if (ret) { @@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  			}  		} -		/* Validate succeeded, now the BO owns the pages, free -		 * our copy of the pointer array. Put this BO back on -		 * the userptr_valid_list. If we need to revalidate -		 * it, we need to start from scratch. -		 */ -		kvfree(mem->user_pages); -		mem->user_pages = NULL;  		list_move_tail(&mem->validate_list.head,  			       &process_info->userptr_valid_list); +		/* Stop HMM track the userptr update. We dont check the return +		 * value for concurrent CPU page table update because we will +		 * reschedule the restore worker if process_info->evicted_bos +		 * is updated. +		 */ +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +  		/* Update mapping. If the BO was not validated  		 * (because we couldn't get user pages), this will  		 * clear the page table entries, which will result in @@ -1897,8 +1876,9 @@ unreserve_out:  	ttm_eu_backoff_reservation(&ticket, &resv_list);  	amdgpu_sync_wait(&sync, false);  	amdgpu_sync_free(&sync); -out: +out_free:  	kfree(pd_bo_list_entries); +out_no_mem:  	return ret;  } @@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)  		 * hanging. No point trying again.  		 */  	} +  unlock_out: +	untrack_invalid_user_pages(process_info);  	mutex_unlock(&process_info->lock);  	mmput(mm);  	put_task_struct(usertask); @@ -2130,3 +2112,88 @@ ttm_reserve_fail:  	kfree(pd_bo_list);  	return ret;  } + +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) +{ +	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; +	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; +	int ret; + +	if (!info || !gws) +		return -EINVAL; + +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); +	if (!*mem) +		return -EINVAL; + +	mutex_init(&(*mem)->lock); +	(*mem)->bo = amdgpu_bo_ref(gws_bo); +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; +	(*mem)->process_info = process_info; +	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); +	amdgpu_sync_create(&(*mem)->sync); + + +	/* Validate gws bo the first time it is added to process */ +	mutex_lock(&(*mem)->process_info->lock); +	ret = amdgpu_bo_reserve(gws_bo, false); +	if (unlikely(ret)) { +		pr_err("Reserve gws bo failed %d\n", ret); +		goto bo_reservation_failure; +	} + +	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); +	if (ret) { +		pr_err("GWS BO validate failed %d\n", ret); +		goto bo_validation_failure; +	} +	/* GWS resource is shared b/t amdgpu and amdkfd +	 * Add process eviction fence to bo so they can +	 * evict each other. +	 */ +	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); +	amdgpu_bo_unreserve(gws_bo); +	mutex_unlock(&(*mem)->process_info->lock); + +	return ret; + +bo_validation_failure: +	amdgpu_bo_unreserve(gws_bo); +bo_reservation_failure: +	mutex_unlock(&(*mem)->process_info->lock); +	amdgpu_sync_free(&(*mem)->sync); +	remove_kgd_mem_from_kfd_bo_list(*mem, process_info); +	amdgpu_bo_unref(&gws_bo); +	mutex_destroy(&(*mem)->lock); +	kfree(*mem); +	*mem = NULL; +	return ret; +} + +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) +{ +	int ret; +	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; +	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; +	struct amdgpu_bo *gws_bo = kgd_mem->bo; + +	/* Remove BO from process's validate list so restore worker won't touch +	 * it anymore +	 */ +	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); + +	ret = amdgpu_bo_reserve(gws_bo, false); +	if (unlikely(ret)) { +		pr_err("Reserve gws bo failed %d\n", ret); +		//TODO add BO back to validate_list? +		return ret; +	} +	amdgpu_amdkfd_remove_eviction_fence(gws_bo, +			process_info->eviction_fence); +	amdgpu_bo_unreserve(gws_bo); +	amdgpu_sync_free(&kgd_mem->sync); +	amdgpu_bo_unref(&gws_bo); +	mutex_destroy(&kgd_mem->lock); +	kfree(mem); +	return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5c79da8e1150..d497467b7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,  		return -ENOMEM;  	kref_init(&list->refcount); -	list->gds_obj = adev->gds.gds_gfx_bo; -	list->gws_obj = adev->gds.gws_gfx_bo; -	list->oa_obj = adev->gds.oa_gfx_bo; +	list->gds_obj = NULL; +	list->gws_obj = NULL; +	list->oa_obj = NULL;  	array = amdgpu_bo_list_array_entry(list, 0);  	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {  	struct amdgpu_bo_va		*bo_va;  	uint32_t			priority;  	struct page			**user_pages; -	int				user_invalidated; +	bool				user_invalidated;  };  struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6239b6be6f..d72cc583ebd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,  	p->uf_entry.tv.bo = &bo->tbo;  	/* One for TTM and one for the CS job */  	p->uf_entry.tv.num_shared = 2; -	p->uf_entry.user_pages = NULL;  	drm_gem_object_put_unlocked(gobj); @@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,  		if (usermm && usermm != current->mm)  			return -EPERM; -		/* Check if we have user pages and nobody bound the BO already */ -		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && -		    lobj->user_pages) { +		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && +		    lobj->user_invalidated && lobj->user_pages) {  			amdgpu_bo_placement_from_domain(bo,  							AMDGPU_GEM_DOMAIN_CPU);  			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  			if (r)  				return r; +  			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,  						     lobj->user_pages);  			binding_userptr = true; @@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	struct amdgpu_bo *gds;  	struct amdgpu_bo *gws;  	struct amdgpu_bo *oa; -	unsigned tries = 10;  	int r;  	INIT_LIST_HEAD(&p->validated); @@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)  		list_add(&p->uf_entry.tv.head, &p->validated); -	while (1) { -		struct list_head need_pages; - -		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, -					   &duplicates); -		if (unlikely(r != 0)) { -			if (r != -ERESTARTSYS) -				DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); -			goto error_free_pages; -		} - -		INIT_LIST_HEAD(&need_pages); -		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { -			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - -			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, -				 &e->user_invalidated) && e->user_pages) { - -				/* We acquired a page array, but somebody -				 * invalidated it. Free it and try again -				 */ -				release_pages(e->user_pages, -					      bo->tbo.ttm->num_pages); -				kvfree(e->user_pages); -				e->user_pages = NULL; -			} - -			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && -			    !e->user_pages) { -				list_del(&e->tv.head); -				list_add(&e->tv.head, &need_pages); - -				amdgpu_bo_unreserve(bo); -			} +	/* Get userptr backing pages. If pages are updated after registered +	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do +	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages +	 */ +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); +		bool userpage_invalidated = false; +		int i; + +		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, +					sizeof(struct page *), +					GFP_KERNEL | __GFP_ZERO); +		if (!e->user_pages) { +			DRM_ERROR("calloc failure\n"); +			return -ENOMEM;  		} -		if (list_empty(&need_pages)) -			break; - -		/* Unreserve everything again. */ -		ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -		/* We tried too many times, just abort */ -		if (!--tries) { -			r = -EDEADLK; -			DRM_ERROR("deadlock in %s\n", __func__); -			goto error_free_pages; +		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); +		if (r) { +			kvfree(e->user_pages); +			e->user_pages = NULL; +			return r;  		} -		/* Fill the page arrays for all userptrs. */ -		list_for_each_entry(e, &need_pages, tv.head) { -			struct ttm_tt *ttm = e->tv.bo->ttm; - -			e->user_pages = kvmalloc_array(ttm->num_pages, -							 sizeof(struct page*), -							 GFP_KERNEL | __GFP_ZERO); -			if (!e->user_pages) { -				r = -ENOMEM; -				DRM_ERROR("calloc failure in %s\n", __func__); -				goto error_free_pages; -			} - -			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); -			if (r) { -				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); -				kvfree(e->user_pages); -				e->user_pages = NULL; -				goto error_free_pages; +		for (i = 0; i < bo->tbo.ttm->num_pages; i++) { +			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { +				userpage_invalidated = true; +				break;  			}  		} +		e->user_invalidated = userpage_invalidated; +	} -		/* And try again. */ -		list_splice(&need_pages, &p->validated); +	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, +				   &duplicates); +	if (unlikely(r != 0)) { +		if (r != -ERESTARTSYS) +			DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); +		goto out;  	}  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  error_validate:  	if (r)  		ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - -	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { -		if (!e->user_pages) -			continue; - -		release_pages(e->user_pages, e->tv.bo->ttm->num_pages); -		kvfree(e->user_pages); -	} - +out:  	return r;  } @@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  		j++;  	} -	/* UVD & VCE fw doesn't support user fences */ +	/* MM engine doesn't support user fences */  	ring = to_amdgpu_ring(parser->entity->rq->sched); -	if (parser->job->uf_addr && ( -	    ring->funcs->type == AMDGPU_RING_TYPE_UVD || -	    ring->funcs->type == AMDGPU_RING_TYPE_VCE)) +	if (parser->job->uf_addr && ring->funcs->no_user_fence)  		return -EINVAL;  	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); @@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	struct amdgpu_bo_list_entry *e;  	struct amdgpu_job *job;  	uint64_t seq; -  	int r;  	job = p->job; @@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	if (r)  		goto error_unlock; -	/* No memory allocation is allowed while holding the mn lock */ +	/* No memory allocation is allowed while holding the mn lock. +	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added +	 * to BOs. +	 */  	amdgpu_mn_lock(p->mn); + +	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return +	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. +	 */  	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); -		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { -			r = -ERESTARTSYS; -			goto error_abort; -		} +		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +	} +	if (r) { +		r = -EAGAIN; +		goto error_abort;  	}  	job->owner = p->filp; @@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  out:  	amdgpu_cs_parser_fini(&parser, r, reserved_buffers); +  	return r;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9f282e971197..0ffa6733f2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {  	"LAST",  }; +/** + * DOC: pcie_replay_count + * + * The amdgpu driver provides a sysfs API for reporting the total number + * of PCIe replays (NAKs) + * The file pcie_replay_count is used for this and returns the total + * number of replays as a sum of the NAKs generated and NAKs received + */ + +static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, +		struct device_attribute *attr, char *buf) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; +	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); + +	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +} + +static DEVICE_ATTR(pcie_replay_count, S_IRUGO, +		amdgpu_device_get_pcie_replay_count, NULL); +  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);  /** @@ -910,8 +932,10 @@ def_value:   * Validates certain module parameters and updates   * the associated values used by the driver (all asics).   */ -static void amdgpu_device_check_arguments(struct amdgpu_device *adev) +static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  { +	int ret = 0; +  	if (amdgpu_sched_jobs < 4) {  		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",  			 amdgpu_sched_jobs); @@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)  		amdgpu_vram_page_split = 1024;  	} -	if (amdgpu_lockup_timeout == 0) { -		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); -		amdgpu_lockup_timeout = 10000; +	ret = amdgpu_device_get_job_timeout_settings(adev); +	if (ret) { +		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); +		return ret;  	}  	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + +	return ret;  }  /** @@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  		r = amdgpu_virt_request_full_gpu(adev, true);  		if (r)  			return -EAGAIN; + +		/* query the reg access mode at the very beginning */ +		amdgpu_virt_init_reg_access_mode(adev);  	}  	adev->pm.pp_feature = amdgpu_pp_feature_mask;  	if (amdgpu_sriov_vf(adev))  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK; +	/* Read BIOS */ +	if (!amdgpu_get_bios(adev)) +		return -EINVAL; + +	r = amdgpu_atombios_init(adev); +	if (r) { +		dev_err(adev->dev, "amdgpu_atombios_init failed\n"); +		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); +		return r; +	} +  	for (i = 0; i < adev->num_ip_blocks; i++) {  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {  			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)  		if (adev->ip_blocks[i].status.hw)  			continue;  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || +		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {  			r = adev->ip_blocks[i].version->funcs->hw_init(adev);  			if (r) { @@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	mutex_init(&adev->lock_reset);  	mutex_init(&adev->virt.dpm_mutex); -	amdgpu_device_check_arguments(adev); +	r = amdgpu_device_check_arguments(adev); +	if (r) +		return r;  	spin_lock_init(&adev->mmio_idx_lock);  	spin_lock_init(&adev->smc_idx_lock); @@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		goto fence_driver_init;  	} -	/* Read BIOS */ -	if (!amdgpu_get_bios(adev)) { -		r = -EINVAL; -		goto failed; -	} - -	r = amdgpu_atombios_init(adev); -	if (r) { -		dev_err(adev->dev, "amdgpu_atombios_init failed\n"); -		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); -		goto failed; -	} -  	/* detect if we are with an SRIOV vbios */  	amdgpu_device_detect_sriov_bios(adev); @@ -2672,6 +2703,10 @@ fence_driver_init:  	if (r)  		DRM_ERROR("registering pm debugfs failed (%d).\n", r); +	r = amdgpu_ucode_sysfs_init(adev); +	if (r) +		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); +  	r = amdgpu_debugfs_gem_init(adev);  	if (r)  		DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2712,7 +2747,13 @@ fence_driver_init:  	}  	/* must succeed. */ -	amdgpu_ras_post_init(adev); +	amdgpu_ras_resume(adev); + +	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); +	if (r) { +		dev_err(adev->dev, "Could not create pcie_replay_count"); +		return r; +	}  	return 0; @@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	adev->rmmio = NULL;  	amdgpu_device_doorbell_fini(adev);  	amdgpu_debugfs_regs_cleanup(adev); +	device_remove_file(adev->dev, &dev_attr_pcie_replay_count); +	amdgpu_ucode_sysfs_fini(adev);  } @@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)  	amdgpu_amdkfd_suspend(adev); +	amdgpu_ras_suspend(adev); +  	r = amdgpu_device_ip_suspend_phase1(adev);  	/* evict vram memory */ @@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  	drm_kms_helper_poll_enable(dev); +	amdgpu_ras_resume(adev); +  	/*  	 * Most of the connector probing functions try to acquire runtime pm  	 * refs to ensure that the GPU is powered on when connector polling is @@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  				if (vram_lost)  					amdgpu_device_fill_reset_magic(tmp_adev); +				r = amdgpu_device_ip_late_init(tmp_adev); +				if (r) +					goto out; + +				/* must succeed. */ +				amdgpu_ras_resume(tmp_adev); +  				/* Update PSP FW topology after reset */  				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)  					r = amdgpu_xgmi_update_topology(hive, tmp_adev); @@ -3695,43 +3749,6 @@ skip_hw_reset:  	return r;  } -static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, -						  enum pci_bus_speed *speed, -						  enum pcie_link_width *width) -{ -	struct pci_dev *pdev = adev->pdev; -	enum pci_bus_speed cur_speed; -	enum pcie_link_width cur_width; -	u32 ret = 1; - -	*speed = PCI_SPEED_UNKNOWN; -	*width = PCIE_LNK_WIDTH_UNKNOWN; - -	while (pdev) { -		cur_speed = pcie_get_speed_cap(pdev); -		cur_width = pcie_get_width_cap(pdev); -		ret = pcie_bandwidth_available(adev->pdev, NULL, -						       NULL, &cur_width); -		if (!ret) -			cur_width = PCIE_LNK_WIDTH_RESRV; - -		if (cur_speed != PCI_SPEED_UNKNOWN) { -			if (*speed == PCI_SPEED_UNKNOWN) -				*speed = cur_speed; -			else if (cur_speed < *speed) -				*speed = cur_speed; -		} - -		if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { -			if (*width == PCIE_LNK_WIDTH_UNKNOWN) -				*width = cur_width; -			else if (cur_width < *width) -				*width = cur_width; -		} -		pdev = pci_upstream_bridge(pdev); -	} -} -  /**   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot   * @@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)  		return; -	amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, -					      &platform_link_width); +	pcie_bandwidth_available(adev->pdev, NULL, +				 &platform_speed_cap, &platform_link_width);  	if (adev->pm.pcie_gen_mask == 0) {  		/* asic caps */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b083b219b1a9..30e6ad8a90bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)  					 amdgpu_dither_enum_list, sz);  	if (amdgpu_device_has_dc_support(adev)) { -		adev->mode_info.max_bpc_property = -			drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); -		if (!adev->mode_info.max_bpc_property) -			return -ENOMEM;  		adev->mode_info.abm_level_property =  			drm_property_create_range(adev->ddev, 0,  						"abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a38e0fb4a6fe..4711cf1b5bd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -1,5 +1,5 @@  /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2019 Advanced Micro Devices, Inc.   *   * Permission is hereby granted, free of charge, to any person obtaining a   * copy of this software and associated documentation files (the "Software"), @@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)   * Returns:   * 0 on success or a negative error code on failure.   */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, +			  struct vm_area_struct *vma)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -137,57 +138,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma  	return ret;  } -/** - * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table - * implementation - * @dev: DRM device - * @attach: DMA-buf attachment - * @sg: Scatter/gather table - * - * Imports shared DMA buffer memory exported by another device. - * - * Returns: - * A new GEM BO of the given DRM device, representing the memory - * described by the given DMA-buf attachment and scatter/gather table. - */ -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, -				 struct dma_buf_attachment *attach, -				 struct sg_table *sg) -{ -	struct reservation_object *resv = attach->dmabuf->resv; -	struct amdgpu_device *adev = dev->dev_private; -	struct amdgpu_bo *bo; -	struct amdgpu_bo_param bp; -	int ret; - -	memset(&bp, 0, sizeof(bp)); -	bp.size = attach->dmabuf->size; -	bp.byte_align = PAGE_SIZE; -	bp.domain = AMDGPU_GEM_DOMAIN_CPU; -	bp.flags = 0; -	bp.type = ttm_bo_type_sg; -	bp.resv = resv; -	ww_mutex_lock(&resv->lock, NULL); -	ret = amdgpu_bo_create(adev, &bp, &bo); -	if (ret) -		goto error; - -	bo->tbo.sg = sg; -	bo->tbo.ttm->sg = sg; -	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; -	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; -	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) -		bo->prime_shared_count = 1; - -	ww_mutex_unlock(&resv->lock); -	return &bo->gem_base; - -error: -	ww_mutex_unlock(&resv->lock); -	return ERR_PTR(ret); -} -  static int  __reservation_object_make_exclusive(struct reservation_object *obj)  { @@ -231,7 +181,7 @@ err_fences_put:  }  /** - * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation   * @dma_buf: Shared DMA buffer   * @attach: DMA-buf attachment   * @@ -242,8 +192,8 @@ err_fences_put:   * Returns:   * 0 on success or a negative error code on failure.   */ -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, -				 struct dma_buf_attachment *attach) +static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, +				     struct dma_buf_attachment *attach)  {  	struct drm_gem_object *obj = dma_buf->priv;  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -291,15 +241,15 @@ error_detach:  }  /** - * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation   * @dma_buf: Shared DMA buffer   * @attach: DMA-buf attachment   *   * This is called when a shared DMA buffer no longer needs to be accessible by   * another device. For now, simply unpins the buffer from GTT.   */ -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, -				  struct dma_buf_attachment *attach) +static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, +				      struct dma_buf_attachment *attach)  {  	struct drm_gem_object *obj = dma_buf->priv;  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -334,7 +284,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)  }  /** - * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation   * @dma_buf: Shared DMA buffer   * @direction: Direction of DMA transfer   * @@ -345,8 +295,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)   * Returns:   * 0 on success or a negative error code on failure.   */ -static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, -				       enum dma_data_direction direction) +static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, +					   enum dma_data_direction direction)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -374,12 +324,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,  }  const struct dma_buf_ops amdgpu_dmabuf_ops = { -	.attach = amdgpu_gem_map_attach, -	.detach = amdgpu_gem_map_detach, +	.attach = amdgpu_dma_buf_map_attach, +	.detach = amdgpu_dma_buf_map_detach,  	.map_dma_buf = drm_gem_map_dma_buf,  	.unmap_dma_buf = drm_gem_unmap_dma_buf,  	.release = drm_gem_dmabuf_release, -	.begin_cpu_access = amdgpu_gem_begin_cpu_access, +	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,  	.mmap = drm_gem_dmabuf_mmap,  	.vmap = drm_gem_dmabuf_vmap,  	.vunmap = drm_gem_dmabuf_vunmap, @@ -418,6 +368,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,  }  /** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Imports shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM BO of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, +				 struct dma_buf_attachment *attach, +				 struct sg_table *sg) +{ +	struct reservation_object *resv = attach->dmabuf->resv; +	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_bo *bo; +	struct amdgpu_bo_param bp; +	int ret; + +	memset(&bp, 0, sizeof(bp)); +	bp.size = attach->dmabuf->size; +	bp.byte_align = PAGE_SIZE; +	bp.domain = AMDGPU_GEM_DOMAIN_CPU; +	bp.flags = 0; +	bp.type = ttm_bo_type_sg; +	bp.resv = resv; +	ww_mutex_lock(&resv->lock, NULL); +	ret = amdgpu_bo_create(adev, &bp, &bo); +	if (ret) +		goto error; + +	bo->tbo.sg = sg; +	bo->tbo.ttm->sg = sg; +	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; +	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; +	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) +		bo->prime_shared_count = 1; + +	ww_mutex_unlock(&resv->lock); +	return &bo->gem_base; + +error: +	ww_mutex_unlock(&resv->lock); +	return ERR_PTR(ret); +} + +/**   * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation   * @dev: DRM device   * @dma_buf: Shared DMA buffer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h new file mode 100644 index 000000000000..c7056cbe8685 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -0,0 +1,46 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_DMA_BUF_H__ +#define __AMDGPU_DMA_BUF_H__ + +#include <drm/drm_gem.h> + +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, +				 struct dma_buf_attachment *attach, +				 struct sg_table *sg); +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, +					struct drm_gem_object *gobj, +					int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, +					    struct dma_buf *dma_buf); +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, +			  struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dca35407879d..521dbd0d9af8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {  	int                min_temp;  	/* high temperature threshold */  	int                max_temp; +	/* edge max emergency(shutdown) temp */ +	int                max_edge_emergency_temp; +	/* hotspot low temperature threshold */ +	int                min_hotspot_temp; +	/* hotspot high temperature critical threshold */ +	int                max_hotspot_crit_temp; +	/* hotspot max emergency(shutdown) temp */ +	int                max_hotspot_emergency_temp; +	/* memory low temperature threshold */ +	int                min_mem_temp; +	/* memory high temperature critical threshold */ +	int                max_mem_crit_temp; +	/* memory max emergency(shutdown) temp */ +	int                max_mem_emergency_temp;  	/* was last interrupt low to high or high to low */  	bool               high_to_low;  	/* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1e2cc9d68a05..1f38d6fc1fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,7 +36,7 @@  #include "amdgpu.h"  #include "amdgpu_irq.h" -#include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h"  #include "amdgpu_amdkfd.h" @@ -81,6 +81,8 @@  #define KMS_DRIVER_MINOR	32  #define KMS_DRIVER_PATCHLEVEL	0 +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH	256 +  int amdgpu_vram_limit = 0;  int amdgpu_vis_vram_limit = 0;  int amdgpu_gart_size = -1; /* auto */ @@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0;  int amdgpu_hw_i2c = 0;  int amdgpu_pcie_gen2 = -1;  int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 10000; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];  int amdgpu_dpm = -1;  int amdgpu_fw_load_type = -1;  int amdgpu_aspm = -1; @@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(msi, amdgpu_msi, int, 0444);  /** - * DOC: lockup_timeout (int) - * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. - * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. - */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); -module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); + * DOC: lockup_timeout (string) + * Set GPU scheduler timeout value in ms. + * + * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or + * multiple values specified. 0 and negative values are invalidated. They will be adjusted + * to default timeout. + *  - With one value specified, the setting will apply to all non-compute jobs. + *  - With multiple values specified, the first one will be for GFX. The second one is for Compute. + *    And the third and fourth ones are for SDMA and Video. + * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) + * jobs is 10000. And there is no timeout enforced on compute jobs. + */ +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), " +		"format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);  /**   * DOC: dpm (int) @@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry,  int halt_if_hws_hang;  module_param(halt_if_hws_hang, int, 0644);  MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +/** + * DOC: hws_gws_support(bool) + * Whether HWS support gws barriers. Default value: false (not supported) + * This will be replaced with a MEC firmware version check once firmware + * is ready + */ +bool hws_gws_support; +module_param(hws_gws_support, bool, 0444); +MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");  #endif  /** @@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)  	return 0;  } +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ +	char *input = amdgpu_lockup_timeout; +	char *timeout_setting = NULL; +	int index = 0; +	long timeout; +	int ret = 0; + +	/* +	 * By default timeout for non compute jobs is 10000. +	 * And there is no timeout enforced on compute jobs. +	 */ +	adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000; +	adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + +	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { +		while ((timeout_setting = strsep(&input, ",")) && +				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { +			ret = kstrtol(timeout_setting, 0, &timeout); +			if (ret) +				return ret; + +			/* Invalidate 0 and negative values */ +			if (timeout <= 0) { +				index++; +				continue; +			} + +			switch (index++) { +			case 0: +				adev->gfx_timeout = timeout; +				break; +			case 1: +				adev->compute_timeout = timeout; +				break; +			case 2: +				adev->sdma_timeout = timeout; +				break; +			case 3: +				adev->video_timeout = timeout; +				break; +			default: +				break; +			} +		} +		/* +		 * There is only one value specified and +		 * it should apply to all non-compute jobs. +		 */ +		if (index == 1) +			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; +	} + +	return ret; +} +  static bool  amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,  				 bool in_vblank_irq, int *vpos, int *hpos, @@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = {  	.driver_features =  	    DRIVER_USE_AGP | DRIVER_ATOMIC |  	    DRIVER_GEM | -	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, +	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | +	    DRIVER_SYNCOBJ_TIMELINE,  	.load = amdgpu_driver_load_kms,  	.open = amdgpu_driver_open_kms,  	.postclose = amdgpu_driver_postclose_kms, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 4dee2326b29c..3a483f7e89c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,  				  unsigned num_hw_submission)  { +	struct amdgpu_device *adev = ring->adev;  	long timeout;  	int r; +	if (!adev) +		return -EINVAL; +  	/* Check that num_hw_submission is a power of two */  	if ((num_hw_submission & (num_hw_submission - 1)) != 0)  		return -EINVAL; @@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,  	/* No need to setup the GPU scheduler for KIQ ring */  	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { -		/* for non-sriov case, no timeout enforce on compute ring */ -		if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) -				&& !amdgpu_sriov_vf(ring->adev)) -			timeout = MAX_SCHEDULE_TIMEOUT; -		else -			timeout = msecs_to_jiffies(amdgpu_lockup_timeout); +		switch (ring->funcs->type) { +		case AMDGPU_RING_TYPE_GFX: +			timeout = adev->gfx_timeout; +			break; +		case AMDGPU_RING_TYPE_COMPUTE: +			/* +			 * For non-sriov case, no timeout enforce +			 * on compute ring by default. Unless user +			 * specifies a timeout for compute ring. +			 * +			 * For sriov case, always use the timeout +			 * as gfx ring +			 */ +			if (!amdgpu_sriov_vf(ring->adev)) +				timeout = adev->compute_timeout; +			else +				timeout = adev->gfx_timeout; +			break; +		case AMDGPU_RING_TYPE_SDMA: +			timeout = adev->sdma_timeout; +			break; +		default: +			timeout = adev->video_timeout; +			break; +		}  		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,  				   num_hw_submission, amdgpu_job_hang_limit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index f89f5734d985..dad2186f4ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -27,26 +27,11 @@  struct amdgpu_ring;  struct amdgpu_bo; -struct amdgpu_gds_asic_info { -	uint32_t	total_size; -	uint32_t	gfx_partition_size; -	uint32_t	cs_partition_size; -}; -  struct amdgpu_gds { -	struct amdgpu_gds_asic_info	mem; -	struct amdgpu_gds_asic_info	gws; -	struct amdgpu_gds_asic_info	oa; +	uint32_t gds_size; +	uint32_t gws_size; +	uint32_t oa_size;  	uint32_t			gds_compute_max_wave_id; - -	/* At present, GDS, GWS and OA resources for gfx (graphics) -	 * is always pre-allocated and available for graphics operation. -	 * Such resource is shared between all gfx clients. -	 * TODO: move this operation to user space -	 * */ -	struct amdgpu_bo*		gds_gfx_bo; -	struct amdgpu_bo*		gws_gfx_bo; -	struct amdgpu_bo*		oa_gfx_bo;  };  struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d4fcf5475464..7b840367004c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,  		r = amdgpu_bo_reserve(bo, true);  		if (r) -			goto free_pages; +			goto user_pages_done;  		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);  		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		amdgpu_bo_unreserve(bo);  		if (r) -			goto free_pages; +			goto user_pages_done;  	}  	r = drm_gem_handle_create(filp, gobj, &handle); -	/* drop reference from allocate - handle holds it now */ -	drm_gem_object_put_unlocked(gobj);  	if (r) -		return r; +		goto user_pages_done;  	args->handle = handle; -	return 0; -free_pages: -	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: +	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);  release_object:  	drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f1ddfc50bcc7..b8ba6e27c61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,  void amdgpu_gem_object_close(struct drm_gem_object *obj,  				struct drm_file *file_priv);  unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, -				 struct dma_buf_attachment *attach, -				 struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, -					struct drm_gem_object *gobj, -					int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, -					    struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -extern const struct dma_buf_ops amdgpu_dmabuf_ops;  /*   * GEM objects. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a17fb1af204..7ab1241bd9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)  	if (amdgpu_device_should_recover_gpu(ring->adev))  		amdgpu_device_gpu_recover(ring->adev, job); +	else +		drm_sched_suspend_timeout(&ring->sched);  }  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b17d0545728e..edb675103bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		struct drm_amdgpu_info_gds gds_info;  		memset(&gds_info, 0, sizeof(gds_info)); -		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; -		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; -		gds_info.gds_total_size = adev->gds.mem.total_size; -		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; -		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; -		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; -		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; +		gds_info.compute_partition_size = adev->gds.gds_size; +		gds_info.gds_total_size = adev->gds.gds_size; +		gds_info.gws_per_compute_partition = adev->gds.gws_size; +		gds_info.oa_per_compute_partition = adev->gds.oa_size;  		return copy_to_user(out, &gds_info,  				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 58ed401c5996..41ccee49a224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,7 +45,7 @@  #include <linux/firmware.h>  #include <linux/module.h> -#include <linux/mmu_notifier.h> +#include <linux/hmm.h>  #include <linux/interval_tree.h>  #include <drm/drmP.h>  #include <drm/drm.h> @@ -58,14 +58,12 @@   *   * @adev: amdgpu device pointer   * @mm: process address space - * @mn: MMU notifier structure   * @type: type of MMU notifier   * @work: destruction work item   * @node: hash table node to find structure by adev and mn   * @lock: rw semaphore protecting the notifier nodes   * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion + * @mirror: HMM mirror function support   *   * Data for each amdgpu device and process address space.   */ @@ -73,7 +71,6 @@ struct amdgpu_mn {  	/* constant after initialisation */  	struct amdgpu_device	*adev;  	struct mm_struct	*mm; -	struct mmu_notifier	mn;  	enum amdgpu_mn_type	type;  	/* only used on destruction */ @@ -85,8 +82,9 @@ struct amdgpu_mn {  	/* objects protected by lock */  	struct rw_semaphore	lock;  	struct rb_root_cached	objects; -	struct mutex		read_lock; -	atomic_t		recursion; + +	/* HMM mirror */ +	struct hmm_mirror	mirror;  };  /** @@ -103,7 +101,7 @@ struct amdgpu_mn_node {  };  /** - * amdgpu_mn_destroy - destroy the MMU notifier + * amdgpu_mn_destroy - destroy the HMM mirror   *   * @work: previously sheduled work item   * @@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)  	}  	up_write(&amn->lock);  	mutex_unlock(&adev->mn_lock); -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + +	hmm_mirror_unregister(&amn->mirror);  	kfree(amn);  }  /** - * amdgpu_mn_release - callback to notify about mm destruction + * amdgpu_hmm_mirror_release - callback to notify about mm destruction   * - * @mn: our notifier - * @mm: the mm this callback is about + * @mirror: the HMM mirror (mm) this callback is about   * - * Shedule a work item to lazy destroy our notifier. + * Shedule a work item to lazy destroy HMM mirror.   */ -static void amdgpu_mn_release(struct mmu_notifier *mn, -			      struct mm_struct *mm) +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)  { -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);  	INIT_WORK(&amn->work, amdgpu_mn_destroy);  	schedule_work(&amn->work);  } -  /**   * amdgpu_mn_lock - take the write side lock for this notifier   * @@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)  static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)  {  	if (blockable) -		mutex_lock(&amn->read_lock); -	else if (!mutex_trylock(&amn->read_lock)) +		down_read(&amn->lock); +	else if (!down_read_trylock(&amn->lock))  		return -EAGAIN; -	if (atomic_inc_return(&amn->recursion) == 1) -		down_read_non_owner(&amn->lock); -	mutex_unlock(&amn->read_lock); -  	return 0;  } @@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)   */  static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)  { -	if (atomic_dec_return(&amn->recursion) == 0) -		up_read_non_owner(&amn->lock); +	up_read(&amn->lock);  }  /** @@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,  			true, false, MAX_SCHEDULE_TIMEOUT);  		if (r <= 0)  			DRM_ERROR("(%ld) failed to wait for user bo\n", r); - -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);  	}  }  /** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change   * - * @mn: our notifier - * @range: mmu notifier context + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address   *   * Block for operations on BOs to finish and mark pages as accessed and   * potentially dirty.   */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, -			const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, +			const struct hmm_update *update)  { -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); +	unsigned long start = update->start; +	unsigned long end = update->end; +	bool blockable = update->blockable;  	struct interval_tree_node *it; -	unsigned long end;  	/* notification is exclusive, but interval is inclusive */ -	end = range->end - 1; +	end -= 1;  	/* TODO we should be able to split locking for interval tree and  	 * amdgpu_mn_invalidate_node  	 */ -	if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) +	if (amdgpu_mn_read_lock(amn, blockable))  		return -EAGAIN; -	it = interval_tree_iter_first(&amn->objects, range->start, end); +	it = interval_tree_iter_first(&amn->objects, start, end);  	while (it) {  		struct amdgpu_mn_node *node; -		if (!mmu_notifier_range_blockable(range)) { +		if (!blockable) {  			amdgpu_mn_read_unlock(amn);  			return -EAGAIN;  		}  		node = container_of(it, struct amdgpu_mn_node, it); -		it = interval_tree_iter_next(it, range->start, end); +		it = interval_tree_iter_next(it, start, end); -		amdgpu_mn_invalidate_node(node, range->start, end); +		amdgpu_mn_invalidate_node(node, start, end);  	} +	amdgpu_mn_read_unlock(amn); +  	return 0;  }  /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change   * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address   *   * We temporarily evict all BOs between start and end. This   * necessitates evicting all user-mode queues of the process. The BOs   * are restorted in amdgpu_mn_invalidate_range_end_hsa.   */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, -			const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, +			const struct hmm_update *update)  { -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); +	unsigned long start = update->start; +	unsigned long end = update->end; +	bool blockable = update->blockable;  	struct interval_tree_node *it; -	unsigned long end;  	/* notification is exclusive, but interval is inclusive */ -	end = range->end - 1; +	end -= 1; -	if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) +	if (amdgpu_mn_read_lock(amn, blockable))  		return -EAGAIN; -	it = interval_tree_iter_first(&amn->objects, range->start, end); +	it = interval_tree_iter_first(&amn->objects, start, end);  	while (it) {  		struct amdgpu_mn_node *node;  		struct amdgpu_bo *bo; -		if (!mmu_notifier_range_blockable(range)) { +		if (!blockable) {  			amdgpu_mn_read_unlock(amn);  			return -EAGAIN;  		}  		node = container_of(it, struct amdgpu_mn_node, it); -		it = interval_tree_iter_next(it, range->start, end); +		it = interval_tree_iter_next(it, start, end);  		list_for_each_entry(bo, &node->bos, mn_list) {  			struct kgd_mem *mem = bo->kfd_bo;  			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, -							 range->start, -							 end)) -				amdgpu_amdkfd_evict_userptr(mem, range->mm); +							 start, end)) +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);  		}  	} +	amdgpu_mn_read_unlock(amn); +  	return 0;  } -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type.   */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, -			const struct mmu_notifier_range *range) -{ -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - -	amdgpu_mn_read_unlock(amn); -} +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {  	[AMDGPU_MN_TYPE_GFX] = { -		.release = amdgpu_mn_release, -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, -		.invalidate_range_end = amdgpu_mn_invalidate_range_end, +		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, +		.release = amdgpu_hmm_mirror_release  	},  	[AMDGPU_MN_TYPE_HSA] = { -		.release = amdgpu_mn_release, -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, -		.invalidate_range_end = amdgpu_mn_invalidate_range_end, +		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, +		.release = amdgpu_hmm_mirror_release  	},  }; -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) -  /** - * amdgpu_mn_get - create notifier context + * amdgpu_mn_get - create HMM mirror context   *   * @adev: amdgpu device pointer   * @type: type of MMU notifier context   * - * Creates a notifier context for current->mm. + * Creates a HMM mirror context for current->mm.   */  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  				enum amdgpu_mn_type type) @@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  	amn->mm = mm;  	init_rwsem(&amn->lock);  	amn->type = type; -	amn->mn.ops = &amdgpu_mn_ops[type];  	amn->objects = RB_ROOT_CACHED; -	mutex_init(&amn->read_lock); -	atomic_set(&amn->recursion, 0); -	r = __mmu_notifier_register(&amn->mn, mm); +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; +	r = hmm_mirror_register(&amn->mirror, mm);  	if (r)  		goto free_amn; @@ -432,7 +404,7 @@ free_amn:   * @bo: amdgpu buffer object   * @addr: userptr addr we should monitor   * - * Registers an MMU notifier for the given BO at the specified address. + * Registers an HMM mirror for the given BO at the specified address.   * Returns 0 on success, -ERRNO if anything goes wrong.   */  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  }  /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates   *   * @bo: amdgpu buffer object   * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of HMM mirror updates from the buffer object.   */  void amdgpu_mn_unregister(struct amdgpu_bo *bo)  { @@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)  	mutex_unlock(&adev->mn_lock);  } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { +		(1 << 0), /* HMM_PFN_VALID */ +		(1 << 1), /* HMM_PFN_WRITE */ +		0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ +		0, /* HMM_PFN_NONE */ +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ +	if (range) { +		range->flags = hmm_range_flags; +		range->values = hmm_range_values; +		range->pfn_shift = PAGE_SHIFT; +		range->pfns = NULL; +		INIT_LIST_HEAD(&range->list); +	} +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..f5b67c63ed6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,22 +25,24 @@  #define __AMDGPU_MN_H__  /* - * MMU Notifier + * HMM mirror   */  struct amdgpu_mn; +struct hmm_range;  enum amdgpu_mn_type {  	AMDGPU_MN_TYPE_GFX,  	AMDGPU_MN_TYPE_HSA,  }; -#if defined(CONFIG_MMU_NOTIFIER) +#if defined(CONFIG_HMM_MIRROR)  void amdgpu_mn_lock(struct amdgpu_mn *mn);  void amdgpu_mn_unlock(struct amdgpu_mn *mn);  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  				enum amdgpu_mn_type type);  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);  void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range);  #else  static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}  static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} @@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  }  static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  { +	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " +		      "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");  	return -ENODEV;  }  static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e9e3db778c6..eb9975f4decb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -331,8 +331,6 @@ struct amdgpu_mode_info {  	struct drm_property *audio_property;  	/* FMT dithering */  	struct drm_property *dither_property; -	/* maximum number of bits per channel for monitor color */ -	struct drm_property *max_bpc_property;  	/* Adaptive Backlight Modulation (power feature) */  	struct drm_property *abm_level_property;  	/* hardcoded DFP edid from BIOS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 34471dbaa872..a73e1903d29b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {  	{0, NULL},  }; +static const struct hwmon_temp_label { +	enum PP_HWMON_TEMP channel; +	const char *label; +} temp_label[] = { +	{PP_TEMP_EDGE, "edge"}, +	{PP_TEMP_JUNCTION, "junction"}, +	{PP_TEMP_MEM, "mem"}, +}; +  void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)  {  	if (adev->pm.dpm_enabled) { @@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,  	pr_debug("featuremask = 0x%llx\n", featuremask); -	if (adev->powerplay.pp_funcs->set_ppfeature_status) { +	if (is_support_sw_smu(adev)) { +		ret = smu_set_ppfeature_status(&adev->smu, featuremask); +		if (ret) +			return -EINVAL; +	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {  		ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);  		if (ret)  			return -EINVAL; @@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = ddev->dev_private; -	if (adev->powerplay.pp_funcs->get_ppfeature_status) +	if (is_support_sw_smu(adev)) { +		return smu_get_ppfeature_status(&adev->smu, buf); +	} else if (adev->powerplay.pp_funcs->get_ppfeature_status)  		return amdgpu_dpm_get_ppfeature_status(adev, buf);  	return snprintf(buf, PAGE_SIZE, "\n"); @@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,  }  /** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage.  The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, +		struct device_attribute *attr, +		char *buf) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; +	int r, value, size = sizeof(value); + +	/* read the IP busy sensor */ +	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, +				   (void *)&value, &size); + +	if (r) +		return r; + +	return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + +/**   * DOC: pcie_bw   *   * The amdgpu driver provides a sysfs API for estimating how much data @@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,  			count0, count1, pcie_get_mps(adev->pdev));  } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, +		struct device_attribute *attr, +		char *buf) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; + +	if (adev->unique_id) +		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + +	return 0; +} +  static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);  static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,  		   amdgpu_get_dpm_forced_performance_level, @@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,  		amdgpu_set_pp_od_clk_voltage);  static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,  		amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, +		amdgpu_get_memory_busy_percent, NULL);  static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);  static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,  		amdgpu_get_ppfeature_status,  		amdgpu_set_ppfeature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);  static ssize_t amdgpu_hwmon_show_temp(struct device *dev,  				      struct device_attribute *attr, @@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,  {  	struct amdgpu_device *adev = dev_get_drvdata(dev);  	struct drm_device *ddev = adev->ddev; +	int channel = to_sensor_dev_attr(attr)->index;  	int r, temp, size = sizeof(temp);  	/* Can't get temperature when the card is off */ @@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,  	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))  		return -EINVAL; -	/* get the temperature */ -	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, -				   (void *)&temp, &size); -	if (r) -		return r; +	if (channel >= PP_TEMP_MAX) +		return -EINVAL; + +	switch (channel) { +	case PP_TEMP_JUNCTION: +		/* get current junction temperature */ +		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, +					   (void *)&temp, &size); +		if (r) +			return r; +		break; +	case PP_TEMP_EDGE: +		/* get current edge temperature */ +		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, +					   (void *)&temp, &size); +		if (r) +			return r; +		break; +	case PP_TEMP_MEM: +		/* get current memory temperature */ +		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, +					   (void *)&temp, &size); +		if (r) +			return r; +		break; +	}  	return snprintf(buf, PAGE_SIZE, "%d\n", temp);  } @@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,  	return snprintf(buf, PAGE_SIZE, "%d\n", temp);  } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, +					     struct device_attribute *attr, +					     char *buf) +{ +	struct amdgpu_device *adev = dev_get_drvdata(dev); +	int hyst = to_sensor_dev_attr(attr)->index; +	int temp; + +	if (hyst) +		temp = adev->pm.dpm.thermal.min_hotspot_temp; +	else +		temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + +	return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, +					     struct device_attribute *attr, +					     char *buf) +{ +	struct amdgpu_device *adev = dev_get_drvdata(dev); +	int hyst = to_sensor_dev_attr(attr)->index; +	int temp; + +	if (hyst) +		temp = adev->pm.dpm.thermal.min_mem_temp; +	else +		temp = adev->pm.dpm.thermal.max_mem_crit_temp; + +	return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, +					     struct device_attribute *attr, +					     char *buf) +{ +	int channel = to_sensor_dev_attr(attr)->index; + +	if (channel >= PP_TEMP_MAX) +		return -EINVAL; + +	return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, +					     struct device_attribute *attr, +					     char *buf) +{ +	struct amdgpu_device *adev = dev_get_drvdata(dev); +	int channel = to_sensor_dev_attr(attr)->index; +	int temp = 0; + +	if (channel >= PP_TEMP_MAX) +		return -EINVAL; + +	switch (channel) { +	case PP_TEMP_JUNCTION: +		temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; +		break; +	case PP_TEMP_EDGE: +		temp = adev->pm.dpm.thermal.max_edge_emergency_temp; +		break; +	case PP_TEMP_MEM: +		temp = adev->pm.dpm.thermal.max_mem_emergency_temp; +		break; +	} + +	return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} +  static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,  					    struct device_attribute *attr,  					    char *buf) @@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,   *   * hwmon interfaces for GPU temperature:   * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + *   - temp2_input and temp3_input are supported on SOC15 dGPUs only + * + * - temp[1-3]_label: temperature channel label + *   - temp2_label and temp3_label are supported on SOC15 dGPUs only + * + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only   * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only   * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + *   - these are supported on SOC15 dGPUs only   *   * hwmon interfaces for GPU voltage:   * @@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,   *   */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);  static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);  static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);  static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {  	&sensor_dev_attr_temp1_input.dev_attr.attr,  	&sensor_dev_attr_temp1_crit.dev_attr.attr,  	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, +	&sensor_dev_attr_temp2_input.dev_attr.attr, +	&sensor_dev_attr_temp2_crit.dev_attr.attr, +	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, +	&sensor_dev_attr_temp3_input.dev_attr.attr, +	&sensor_dev_attr_temp3_crit.dev_attr.attr, +	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, +	&sensor_dev_attr_temp1_emergency.dev_attr.attr, +	&sensor_dev_attr_temp2_emergency.dev_attr.attr, +	&sensor_dev_attr_temp3_emergency.dev_attr.attr, +	&sensor_dev_attr_temp1_label.dev_attr.attr, +	&sensor_dev_attr_temp2_label.dev_attr.attr, +	&sensor_dev_attr_temp3_label.dev_attr.attr,  	&sensor_dev_attr_pwm1.dev_attr.attr,  	&sensor_dev_attr_pwm1_enable.dev_attr.attr,  	&sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,  	     attr == &sensor_dev_attr_freq2_label.dev_attr.attr))  		return 0; +	/* only SOC15 dGPUs support hotspot and mem temperatures */ +	if (((adev->flags & AMD_IS_APU) || +	     adev->asic_type < CHIP_VEGA10) && +	    (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || +	     attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || +	     attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || +	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || +	     attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || +	     attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || +	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || +	     attr == &sensor_dev_attr_temp2_input.dev_attr.attr || +	     attr == &sensor_dev_attr_temp3_input.dev_attr.attr || +	     attr == &sensor_dev_attr_temp2_label.dev_attr.attr || +	     attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) +		return 0; +  	return effective_mode;  } @@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  				"gpu_busy_level\n");  		return ret;  	} +	/* APU does not have its own dedicated memory */ +	if (!(adev->flags & AMD_IS_APU)) { +		ret = device_create_file(adev->dev, +				&dev_attr_mem_busy_percent); +		if (ret) { +			DRM_ERROR("failed to create device file	" +					"mem_busy_percent\n"); +			return ret; +		} +	}  	/* PCIe Perf counters won't work on APU nodes */  	if (!(adev->flags & AMD_IS_APU)) {  		ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  			return ret;  		}  	} +	if (adev->unique_id) +		ret = device_create_file(adev->dev, &dev_attr_unique_id); +	if (ret) { +		DRM_ERROR("failed to create device file unique_id\n"); +		return ret; +	}  	ret = amdgpu_debugfs_pm_init(adev);  	if (ret) {  		DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)  				&dev_attr_pp_od_clk_voltage);  	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);  	if (!(adev->flags & AMD_IS_APU)) +		device_remove_file(adev->dev, &dev_attr_mem_busy_percent); +	if (!(adev->flags & AMD_IS_APU))  		device_remove_file(adev->dev, &dev_attr_pcie_bw); +	if (adev->unique_id) +		device_remove_file(adev->dev, &dev_attr_unique_id);  	if ((adev->asic_type >= CHIP_VEGA10) &&  	    !(adev->flags & AMD_IS_APU))  		device_remove_file(adev->dev, &dev_attr_ppfeatures); @@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a  	/* GPU Load */  	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))  		seq_printf(m, "GPU Load: %u %%\n", value); +	/* MEM Load */ +	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) +		seq_printf(m, "MEM Load: %u %%\n", value); +  	seq_printf(m, "\n");  	/* SMC feature mask */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 86cc24b2e0aa..af9835c8395d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)  	return ret;  } +static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, +		uint32_t id, uint32_t value) +{ +	cmd->cmd_id = GFX_CMD_ID_PROG_REG; +	cmd->cmd.cmd_setup_reg_prog.reg_value = value; +	cmd->cmd.cmd_setup_reg_prog.reg_id = id; +} + +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, +		uint32_t value) +{ +	struct psp_gfx_cmd_resp *cmd = NULL; +	int ret = 0; + +	if (reg >= PSP_REG_LAST) +		return -EINVAL; + +	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); +	if (!cmd) +		return -ENOMEM; + +	psp_prep_reg_prog_cmd_buf(cmd, reg, value); +	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + +	kfree(cmd); +	return ret; +} +  static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,  					  uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,  					  uint32_t xgmi_ta_size, uint32_t shared_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cde113f07c96..cf49539b0b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -62,6 +62,14 @@ struct psp_ring  	uint32_t			ring_size;  }; +/* More registers may will be supported */ +enum psp_reg_prog_id { +	PSP_REG_IH_RB_CNTL        = 0,  /* register IH_RB_CNTL */ +	PSP_REG_IH_RB_CNTL_RING1  = 1,  /* register IH_RB_CNTL_RING1 */ +	PSP_REG_IH_RB_CNTL_RING2  = 2,  /* register IH_RB_CNTL_RING2 */ +	PSP_REG_LAST +}; +  struct psp_funcs  {  	int (*init_microcode)(struct psp_context *psp); @@ -95,12 +103,26 @@ struct psp_funcs  	int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);  }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64 +struct psp_xgmi_node_info { +	uint64_t				node_id; +	uint8_t					num_hops; +	uint8_t					is_sharing_enabled; +	enum ta_xgmi_assigned_sdma_engine	sdma_engine; +}; + +struct psp_xgmi_topology_info { +	uint32_t			num_nodes; +	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; +}; +  struct psp_xgmi_context {  	uint8_t				initialized;  	uint32_t			session_id;  	struct amdgpu_bo                *xgmi_shared_bo;  	uint64_t                        xgmi_shared_mc_addr;  	void                            *xgmi_shared_buf; +	struct psp_xgmi_topology_info	top_info;  };  struct psp_ras_context { @@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {  					enum AMDGPU_UCODE_ID);  }; -#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64 -struct psp_xgmi_node_info { -	uint64_t				node_id; -	uint8_t					num_hops; -	uint8_t					is_sharing_enabled; -	enum ta_xgmi_assigned_sdma_engine	sdma_engine; -}; - -struct psp_xgmi_topology_info { -	uint32_t			num_nodes; -	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; -};  #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))  #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) @@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,  		union ta_ras_cmd_input *info, bool enable);  extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; - +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, +		uint32_t value);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..7c8a4aedf07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -90,6 +90,12 @@ struct ras_manager {  	struct ras_err_data err_data;  }; +struct ras_badpage { +	unsigned int bp; +	unsigned int size; +	unsigned int flags; +}; +  const char *ras_error_string[] = {  	"none",  	"parity", @@ -118,7 +124,8 @@ const char *ras_block_string[] = {  #define ras_err_str(i) (ras_error_string[ffs(i)])  #define ras_block_str(i) (ras_block_string[i]) -#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2  #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)  static void amdgpu_ras_self_test(struct amdgpu_device *adev) @@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,  	return 0;  } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface   *   * It accepts struct ras_debug_if who has two members.   * @@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  				enable ? "enable":"disable",  				ras_block_str(head->block),  				ret); +		if (ret == TA_RAS_STATUS__RESET_NEEDED) +			return -EAGAIN;  		return -EINVAL;  	} @@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,  		return -EINVAL;  	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { -		/* If ras is enabled by vbios, we set up ras object first in -		 * both case. For enable, that is all what we need do. For -		 * disable, we need perform a ras TA disable cmd after that. -		 */ -		ret = __amdgpu_ras_feature_enable(adev, head, 1); -		if (ret) -			return ret; +		if (enable) { +			/* There is no harm to issue a ras TA cmd regardless of +			 * the currecnt ras state. +			 * If current state == target state, it will do nothing +			 * But sometimes it requests driver to reset and repost +			 * with error code -EAGAIN. +			 */ +			ret = amdgpu_ras_feature_enable(adev, head, 1); +			/* With old ras TA, we might fail to enable ras. +			 * Log it and just setup the object. +			 * TODO need remove this WA in the future. +			 */ +			if (ret == -EINVAL) { +				ret = __amdgpu_ras_feature_enable(adev, head, 1); +				if (!ret) +					DRM_INFO("RAS INFO: %s setup object\n", +						ras_block_str(head->block)); +			} +		} else { +			/* setup the object then issue a ras TA disable cmd.*/ +			ret = __amdgpu_ras_feature_enable(adev, head, 1); +			if (ret) +				return ret; -		if (!enable)  			ret = amdgpu_ras_feature_enable(adev, head, 0); +		}  	} else  		ret = amdgpu_ras_feature_enable(adev, head, enable); @@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  /* sysfs begin */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, +		struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ +	switch (flags) { +	case 0: +		return "R"; +	case 1: +		return "P"; +	case 2: +	default: +		return "F"; +	}; +} + +/* + * DOC: ras sysfs gpu_vram_bad_pages interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * R: reserved, this gpu page is reserved and not able to use. + * P: pending for reserve, this gpu page is marked as bad, will be reserved + *    in next window of page_reserve. + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, +		struct kobject *kobj, struct bin_attribute *attr, +		char *buf, loff_t ppos, size_t count) +{ +	struct amdgpu_ras *con = +		container_of(attr, struct amdgpu_ras, badpages_attr); +	struct amdgpu_device *adev = con->adev; +	const unsigned int element_size = +		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; +	unsigned int start = div64_ul(ppos + element_size - 1, element_size); +	unsigned int end = div64_ul(ppos + count - 1, element_size); +	ssize_t s = 0; +	struct ras_badpage *bps = NULL; +	unsigned int bps_count = 0; + +	memset(buf, 0, count); + +	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) +		return 0; + +	for (; start < end && start < bps_count; start++) +		s += scnprintf(&buf[s], element_size + 1, +				"0x%08x : 0x%08x : %1s\n", +				bps[start].bp, +				bps[start].size, +				amdgpu_ras_badpage_flags_str(bps[start].flags)); + +	kfree(bps); + +	return s; +} +  static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,  		struct device_attribute *attr, char *buf)  { @@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)  		&con->features_attr.attr,  		NULL  	}; +	struct bin_attribute *bin_attrs[] = { +		&con->badpages_attr, +		NULL +	};  	struct attribute_group group = {  		.name = "ras",  		.attrs = attrs, +		.bin_attrs = bin_attrs,  	};  	con->features_attr = (struct device_attribute) { @@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)  		},  			.show = amdgpu_ras_sysfs_features_read,  	}; + +	con->badpages_attr = (struct bin_attribute) { +		.attr = { +			.name = "gpu_vram_bad_pages", +			.mode = S_IRUGO, +		}, +		.size = 0, +		.private = NULL, +		.read = amdgpu_ras_sysfs_badpages_read, +	}; +  	sysfs_attr_init(attrs[0]); +	sysfs_bin_attr_init(bin_attrs[0]);  	return sysfs_create_group(&adev->dev->kobj, &group);  } @@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)  		&con->features_attr.attr,  		NULL  	}; +	struct bin_attribute *bin_attrs[] = { +		&con->badpages_attr, +		NULL +	};  	struct attribute_group group = {  		.name = "ras",  		.attrs = attrs, +		.bin_attrs = bin_attrs,  	};  	sysfs_remove_group(&adev->dev->kobj, &group); @@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)  /* ih end */  /* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, +		struct ras_badpage **bps, unsigned int *count) +{ +	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); +	struct ras_err_handler_data *data; +	int i = 0; +	int ret = 0; + +	if (!con || !con->eh_data || !bps || !count) +		return -EINVAL; + +	mutex_lock(&con->recovery_lock); +	data = con->eh_data; +	if (!data || data->count == 0) { +		*bps = NULL; +		goto out; +	} + +	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); +	if (!*bps) { +		ret = -ENOMEM; +		goto out; +	} + +	for (; i < data->count; i++) { +		(*bps)[i] = (struct ras_badpage){ +			.bp = data->bps[i].bp, +			.size = AMDGPU_GPU_PAGE_SIZE, +			.flags = 0, +		}; + +		if (data->last_reserved <= i) +			(*bps)[i].flags = 1; +		else if (data->bps[i].bo == NULL) +			(*bps)[i].flags = 2; +	} + +	*count = data->count; +out: +	mutex_unlock(&con->recovery_lock); +	return ret; +} +  static void amdgpu_ras_do_recovery(struct work_struct *work)  {  	struct amdgpu_ras *ras = @@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)  }  /* recovery end */ +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, +		unsigned int block) +{ +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + +	if (!ras) +		return -EINVAL; + +	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; +	return 0; +} +  /*   * check hardware's ras ability which will be saved in hw_supported.   * if hardware does not support ras, we can skip some ras initializtion and @@ -1415,8 +1593,10 @@ recovery_out:  	return -EINVAL;  } -/* do some init work after IP late init as dependence */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +/* do some init work after IP late init as dependence. + * and it runs in resume/gpu reset/booting up cases. + */ +void amdgpu_ras_resume(struct amdgpu_device *adev)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	struct ras_manager *obj, *tmp; @@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)  			}  		}  	} + +	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { +		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; +		/* setup ras obj state as disabled. +		 * for init_by_vbios case. +		 * if we want to enable ras, just enable it in a normal way. +		 * If we want do disable it, need setup ras obj as enabled, +		 * then issue another TA disable cmd. +		 * See feature_enable_on_boot +		 */ +		amdgpu_ras_disable_all_features(adev, 1); +		amdgpu_ras_reset_gpu(adev, 0); +	} +} + +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ +	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + +	if (!con) +		return; + +	amdgpu_ras_disable_all_features(adev, 0); +	/* Make sure all ras objects are disabled. */ +	if (con->features) +		amdgpu_ras_disable_all_features(adev, 1);  }  /* do some fini work before IP fini as dependence */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index eaef5edefc34..c6b34fbd695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -93,6 +93,7 @@ struct amdgpu_ras {  	struct dentry *ent;  	/* sysfs */  	struct device_attribute features_attr; +	struct bin_attribute badpages_attr;  	/* block array */  	struct ras_manager *objs; @@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,  	return ras && (ras->supported & (1 << block));  } +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, +		unsigned int block); + +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); +  int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce); @@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);  static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,  		bool is_baco)  { -	/* remove me when gpu reset works on vega20 A1. */ -#if 0  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);  	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)  		schedule_work(&ras->recovery_work); -#endif  	return 0;  } @@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {  /* called in ip_init and ip_fini */  int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev);  int amdgpu_ras_fini(struct amdgpu_device *adev);  int amdgpu_ras_pre_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d7fae2676269..cdddce938bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {  	uint32_t		align_mask;  	u32			nop;  	bool			support_64bit_ptrs; +	bool			no_user_fence;  	unsigned		vmhub;  	unsigned		extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..7138dc1dd1f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@  #include <linux/pagemap.h>  #include <linux/debugfs.h>  #include <linux/iommu.h> +#include <linux/hmm.h>  #include "amdgpu.h"  #include "amdgpu_object.h"  #include "amdgpu_trace.h" @@ -703,143 +704,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,  /*   * TTM backend functions.   */ -struct amdgpu_ttm_gup_task_list { -	struct list_head	list; -	struct task_struct	*task; -}; -  struct amdgpu_ttm_tt {  	struct ttm_dma_tt	ttm;  	u64			offset;  	uint64_t		userptr;  	struct task_struct	*usertask;  	uint32_t		userflags; -	spinlock_t              guptasklock; -	struct list_head        guptasks; -	atomic_t		mmu_invalidations; -	uint32_t		last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) +	struct hmm_range	*ranges; +	int			nr_ranges; +#endif  };  /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update   * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking   */ +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +/* Support Userptr pages cross max 16 vmas */ +#define MAX_NR_VMAS	(16) +  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	struct mm_struct *mm = gtt->usertask->mm; -	unsigned int flags = 0; -	unsigned pinned = 0; -	int r; +	unsigned long start = gtt->userptr; +	unsigned long end = start + ttm->num_pages * PAGE_SIZE; +	struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; +	struct hmm_range *ranges; +	unsigned long nr_pages, i; +	uint64_t *pfns, f; +	int r = 0;  	if (!mm) /* Happens during process shutdown */  		return -ESRCH; -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) -		flags |= FOLL_WRITE; -  	down_read(&mm->mmap_sem); -	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { -		/* -		 * check that we only use anonymous memory to prevent problems -		 * with writeback -		 */ -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; -		struct vm_area_struct *vma; +	/* user pages may cross multiple VMAs */ +	gtt->nr_ranges = 0; +	do { +		unsigned long vm_start; -		vma = find_vma(mm, gtt->userptr); -		if (!vma || vma->vm_file || vma->vm_end < end) { -			up_read(&mm->mmap_sem); -			return -EPERM; +		if (gtt->nr_ranges >= MAX_NR_VMAS) { +			DRM_ERROR("Too many VMAs in userptr range\n"); +			r = -EFAULT; +			goto out;  		} + +		vm_start = vma ? vma->vm_end : start; +		vma = find_vma(mm, vm_start); +		if (unlikely(!vma || vm_start < vma->vm_start)) { +			r = -EFAULT; +			goto out; +		} +		vmas[gtt->nr_ranges++] = vma; +	} while (end > vma->vm_end); + +	DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", +		start, gtt->nr_ranges, ttm->num_pages); + +	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && +		vmas[0]->vm_file)) { +		r = -EPERM; +		goto out;  	} -	/* loop enough times using contiguous pages of memory */ -	do { -		unsigned num_pages = ttm->num_pages - pinned; -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; -		struct page **p = pages + pinned; -		struct amdgpu_ttm_gup_task_list guptask; +	ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); +	if (unlikely(!ranges)) { +		r = -ENOMEM; +		goto out; +	} -		guptask.task = current; -		spin_lock(>t->guptasklock); -		list_add(&guptask.list, >t->guptasks); -		spin_unlock(>t->guptasklock); +	pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); +	if (unlikely(!pfns)) { +		r = -ENOMEM; +		goto out_free_ranges; +	} -		if (mm == current->mm) -			r = get_user_pages(userptr, num_pages, flags, p, NULL); -		else -			r = get_user_pages_remote(gtt->usertask, -					mm, userptr, num_pages, -					flags, p, NULL, NULL); +	for (i = 0; i < gtt->nr_ranges; i++) +		amdgpu_hmm_init_range(&ranges[i]); -		spin_lock(>t->guptasklock); -		list_del(&guptask.list); -		spin_unlock(>t->guptasklock); +	f = ranges[0].flags[HMM_PFN_VALID]; +	f |= amdgpu_ttm_tt_is_readonly(ttm) ? +				0 : ranges[0].flags[HMM_PFN_WRITE]; +	memset64(pfns, f, ttm->num_pages); -		if (r < 0) -			goto release_pages; +	for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { +		ranges[i].vma = vmas[i]; +		ranges[i].start = max(start, vmas[i]->vm_start); +		ranges[i].end = min(end, vmas[i]->vm_end); +		ranges[i].pfns = pfns + nr_pages; +		nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; -		pinned += r; +		r = hmm_vma_fault(&ranges[i], true); +		if (unlikely(r)) +			break; +	} +	if (unlikely(r)) { +		while (i--) +			hmm_vma_range_done(&ranges[i]); -	} while (pinned < ttm->num_pages); +		goto out_free_pfns; +	}  	up_read(&mm->mmap_sem); + +	for (i = 0; i < ttm->num_pages; i++) { +		pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); +		if (!pages[i]) { +			pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", +			       i, pfns[i]); +			goto out_invalid_pfn; +		} +	} +	gtt->ranges = ranges; +  	return 0; -release_pages: -	release_pages(pages, pinned); +out_free_pfns: +	kvfree(pfns); +out_free_ranges: +	kvfree(ranges); +out:  	up_read(&mm->mmap_sem); +  	return r; + +out_invalid_pfn: +	for (i = 0; i < gtt->nr_ranges; i++) +		hmm_vma_range_done(&ranges[i]); +	kvfree(pfns); +	kvfree(ranges); +	return -ENOMEM;  }  /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated   * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid   */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm; -	unsigned i; +	bool r = false; +	int i; -	gtt->last_set_pages = atomic_read(>t->mmu_invalidations); -	for (i = 0; i < ttm->num_pages; ++i) { -		if (ttm->pages[i]) -			put_page(ttm->pages[i]); +	if (!gtt || !gtt->userptr) +		return false; -		ttm->pages[i] = pages ? pages[i] : NULL; +	DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", +		gtt->userptr, gtt->nr_ranges, ttm->num_pages); + +	WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, +		"No user pages to check\n"); + +	if (gtt->ranges) { +		for (i = 0; i < gtt->nr_ranges; i++) +			r |= hmm_vma_range_done(>t->ranges[i]); +		kvfree(gtt->ranges[0].pfns); +		kvfree(gtt->ranges); +		gtt->ranges = NULL;  	} + +	return r;  } +#endif  /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.   * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space.   */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)  { -	struct amdgpu_ttm_tt *gtt = (void *)ttm; -	unsigned i; - -	for (i = 0; i < ttm->num_pages; ++i) { -		struct page *page = ttm->pages[i]; +	unsigned long i; -		if (!page) -			continue; - -		if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) -			set_page_dirty(page); - -		mark_page_accessed(page); -	} +	for (i = 0; i < ttm->num_pages; ++i) +		ttm->pages[i] = pages ? pages[i] : NULL;  }  /** @@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)  	/* unmap the pages mapped to the device */  	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); -	/* mark the pages as dirty */ -	amdgpu_ttm_tt_mark_user_pages(ttm); -  	sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) +	if (gtt->ranges && +	    ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], +					     gtt->ranges[0].pfns[0])) +		WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif  }  int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,  	gtt->usertask = current->group_leader;  	get_task_struct(gtt->usertask); -	spin_lock_init(>t->guptasklock); -	INIT_LIST_HEAD(>t->guptasks); -	atomic_set(>t->mmu_invalidations, 0); -	gtt->last_set_pages = 0; -  	return 0;  } @@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,  				  unsigned long end)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm; -	struct amdgpu_ttm_gup_task_list *entry;  	unsigned long size;  	if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,  	if (gtt->userptr > end || gtt->userptr + size <= start)  		return false; -	/* Search the lists of tasks that hold this mapping and see -	 * if current is one of them.  If it is return false. -	 */ -	spin_lock(>t->guptasklock); -	list_for_each_entry(entry, >t->guptasks, list) { -		if (entry->task == current) { -			spin_unlock(>t->guptasklock); -			return false; -		} -	} -	spin_unlock(>t->guptasklock); - -	atomic_inc(>t->mmu_invalidations); -  	return true;  }  /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? - */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, -				       int *last_invalidated) -{ -	struct amdgpu_ttm_tt *gtt = (void *)ttm; -	int prev_invalidated = *last_invalidated; - -	*last_invalidated = atomic_read(>t->mmu_invalidations); -	return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?   */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	if (gtt == NULL || !gtt->userptr)  		return false; -	return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; +	return true;  }  /** @@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	/* Initialize various on-chip memory pools */  	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, -			   adev->gds.mem.total_size); +			   adev->gds.gds_size);  	if (r) {  		DRM_ERROR("Failed initializing GDS heap.\n");  		return r;  	} -	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, -				    4, AMDGPU_GEM_DOMAIN_GDS, -				    &adev->gds.gds_gfx_bo, NULL, NULL); -	if (r) -		return r; -  	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, -			   adev->gds.gws.total_size); +			   adev->gds.gws_size);  	if (r) {  		DRM_ERROR("Failed initializing gws heap.\n");  		return r;  	} -	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, -				    1, AMDGPU_GEM_DOMAIN_GWS, -				    &adev->gds.gws_gfx_bo, NULL, NULL); -	if (r) -		return r; -  	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, -			   adev->gds.oa.total_size); +			   adev->gds.oa_size);  	if (r) {  		DRM_ERROR("Failed initializing oa heap.\n");  		return r;  	} -	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, -				    1, AMDGPU_GEM_DOMAIN_OA, -				    &adev->gds.oa_gfx_bo, NULL, NULL); -	if (r) -		return r; -  	/* Register debugfs entries for amdgpu_ttm */  	r = amdgpu_ttm_debugfs_init(adev);  	if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..c2b7669004ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);  int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +{ +	return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ +	return false; +} +#endif +  void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);  int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,  				     uint32_t flags);  bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); @@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,  				  unsigned long end);  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,  				       int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);  uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7b33867036e7..33c1eb76c076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)  	return AMDGPU_FW_LOAD_DIRECT;  } +#define FW_VERSION_ATTR(name, mode, field)				\ +static ssize_t show_##name(struct device *dev,				\ +			  struct device_attribute *attr,		\ +			  char *buf)					\ +{									\ +	struct drm_device *ddev = dev_get_drvdata(dev);			\ +	struct amdgpu_device *adev = ddev->dev_private;			\ +									\ +	return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field);	\ +}									\ +static DEVICE_ATTR(name, mode, show_##name, NULL) + +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); + +static struct attribute *fw_attrs[] = { +	&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, +	&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr, +	&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr, +	&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr, +	&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr, +	&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr, +	&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr, +	&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, +	&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, +	&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, +	&dev_attr_dmcu_fw_version.attr, NULL +}; + +static const struct attribute_group fw_attr_group = { +	.name = "fw_version", +	.attrs = fw_attrs +}; + +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) +{ +	return sysfs_create_group(&adev->dev->kobj, &fw_attr_group); +} + +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev) +{ +	sysfs_remove_group(&adev->dev->kobj, &fw_attr_group); +} +  static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,  				       struct amdgpu_firmware_info *ucode,  				       uint64_t mc_addr, void *kptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7ac25a1c7853..ec4c2ea1f05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,  int amdgpu_ucode_init_bo(struct amdgpu_device *adev);  int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);  void amdgpu_ucode_free_bo(struct amdgpu_device *adev); +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);  enum amdgpu_firmware_load_type  amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..118451f5e3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)  	return 0;  } -static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, -				     struct dpg_pause_state *new_state) -{ -	int ret_code; -	uint32_t reg_data = 0; -	uint32_t reg_data2 = 0; -	struct amdgpu_ring *ring; - -	/* pause/unpause if state is changed */ -	if (adev->vcn.pause_state.fw_based != new_state->fw_based) { -		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", -			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, -			new_state->fw_based, new_state->jpeg); - -		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & -			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); - -		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { -			ret_code = 0; - -			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - -			if (!ret_code) { -				/* pause DPG non-jpeg */ -				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; -				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, -						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, -						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); - -				/* Restore */ -				ring = &adev->vcn.ring_enc[0]; -				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); -				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); -				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); -				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); -				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - -				ring = &adev->vcn.ring_enc[1]; -				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); -				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); -				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); -				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); -				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - -				ring = &adev->vcn.ring_dec; -				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, -						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, -						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); -			} -		} else { -			/* unpause dpg non-jpeg, no need to wait */ -			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; -			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); -		} -		adev->vcn.pause_state.fw_based = new_state->fw_based; -	} - -	/* pause/unpause if state is changed */ -	if (adev->vcn.pause_state.jpeg != new_state->jpeg) { -		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", -			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, -			new_state->fw_based, new_state->jpeg); - -		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & -			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); - -		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { -			ret_code = 0; - -			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - -			if (!ret_code) { -				/* Make sure JPRG Snoop is disabled before sending the pause */ -				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); -				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; -				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); - -				/* pause DPG jpeg */ -				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; -				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, -							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, -							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); - -				/* Restore */ -				ring = &adev->vcn.ring_jpeg; -				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); -				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, -							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | -							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); -				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, -							lower_32_bits(ring->gpu_addr)); -				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, -							upper_32_bits(ring->gpu_addr)); -				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); -				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); -				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, -							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - -				ring = &adev->vcn.ring_dec; -				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, -						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); -				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, -						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, -						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); -			} -		} else { -			/* unpause dpg jpeg, no need to wait */ -			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; -			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); -		} -		adev->vcn.pause_state.jpeg = new_state->jpeg; -	} - -	return 0; -} -  static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  {  	struct amdgpu_device *adev = @@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  		else  			new_state.jpeg = VCN_DPG_STATE__UNPAUSE; -		amdgpu_vcn_pause_dpg_mode(adev, &new_state); +		adev->vcn.pause_dpg_mode(adev, &new_state);  	}  	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); @@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)  			new_state.jpeg = VCN_DPG_STATE__PAUSE; -		amdgpu_vcn_pause_dpg_mode(adev, &new_state); +		adev->vcn.pause_dpg_mode(adev, &new_state);  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a0ad19af9080..a1ee19251aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,27 @@  #define VCN_ENC_CMD_REG_WRITE		0x0000000b  #define VCN_ENC_CMD_REG_WAIT		0x0000000c +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) 				\ +	({	WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); 			\ +		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, 				\ +			UVD_DPG_LMA_CTL__MASK_EN_MASK | 				\ +			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) 	\ +			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\ +			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\ +		RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); 				\ +	}) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) 			\ +	do { 										\ +		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); 			\ +		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); 			\ +		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, 				\ +			UVD_DPG_LMA_CTL__READ_WRITE_MASK | 				\ +			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) 	\ +			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\ +			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\ +	} while (0) +  enum engine_status_constants {  	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,  	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, @@ -81,6 +102,8 @@ struct amdgpu_vcn {  	unsigned		num_enc_rings;  	enum amd_powergating_state cur_state;  	struct dpg_pause_state pause_state; +	int (*pause_dpg_mode)(struct amdgpu_device *adev, +		struct dpg_pause_state *new_state);  };  int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7d484fad3909..1f0bd4d16475 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)  	return clk;  } +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) +{ +	struct amdgpu_virt *virt = &adev->virt; + +	if (virt->ops && virt->ops->init_reg_access_mode) +		virt->ops->init_reg_access_mode(adev); +} + +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) +{ +	bool ret = false; +	struct amdgpu_virt *virt = &adev->virt; + +	if (amdgpu_sriov_vf(adev) +		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) +		ret = true; + +	return ret; +} + +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) +{ +	bool ret = false; +	struct amdgpu_virt *virt = &adev->virt; + +	if (amdgpu_sriov_vf(adev) +		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) +		&& !(amdgpu_sriov_runtime(adev))) +		ret = true; + +	return ret; +} + +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) +{ +	bool ret = false; +	struct amdgpu_virt *virt = &adev->virt; + +	if (amdgpu_sriov_vf(adev) +		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) +		ret = true; + +	return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 584947b7ccf3..dca25deee75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {  	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];  }; +/* According to the fw feature, some new reg access modes are supported */ +#define AMDGPU_VIRT_REG_ACCESS_LEGACY          (1 << 0) /* directly mmio */ +#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH      (1 << 1) /* by PSP */ +#define AMDGPU_VIRT_REG_ACCESS_RLC             (1 << 2) /* by RLC */ +#define AMDGPU_VIRT_REG_SKIP_SEETING           (1 << 3) /* Skip setting reg */ +  /**   * struct amdgpu_virt_ops - amdgpu device virt operations   */ @@ -59,6 +65,7 @@ struct amdgpu_virt_ops {  	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);  	int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);  	int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); +	void (*init_reg_access_mode)(struct amdgpu_device *adev);  };  /* @@ -258,6 +265,7 @@ struct amdgpu_virt {  	uint32_t gim_feature;  	/* protect DPM events to GIM */  	struct mutex                    dpm_mutex; +	uint32_t reg_access_mode;  };  #define amdgpu_sriov_enabled(adev) \ @@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);  uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a48c84c51775..d11eba09eadd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)  	return &hive->device_list;  } +/** + * DOC: AMDGPU XGMI Support + * + * XGMI is a high speed interconnect that joins multiple GPU cards + * into a homogeneous memory space that is organized by a collective + * hive ID and individual node IDs, both of which are 64-bit numbers. + * + * The file xgmi_device_id contains the unique per GPU device ID and + * is stored in the /sys/class/drm/card${cardno}/device/ directory. + * + * Inside the device directory a sub-directory 'xgmi_hive_info' is + * created which contains the hive ID and the list of nodes. + * + * The hive ID is stored in: + *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id + * + * The node information is stored in numbered directories: + *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id + * + * Each device has their own xgmi_hive_info direction with a mirror + * set of node sub-directories. + * + * The XGMI memory space is built by contiguously adding the power of + * two padded VRAM space from each node to each other. + * + */ + +  static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,  		struct device_attribute *attr, char *buf)  { @@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev  	/* Each psp need to set the latest topology */  	ret = psp_xgmi_set_topology_info(&adev->psp,  					 hive->number_devices, -					 &hive->topology_info); +					 &adev->psp.xgmi_context.top_info);  	if (ret)  		dev_err(adev->dev,  			"XGMI: Set topology failure on device %llx, hive %llx, ret %d", @@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev  	return ret;  } + +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, +		struct amdgpu_device *peer_adev) +{ +	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; +	int i; + +	for (i = 0 ; i < top->num_nodes; ++i) +		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) +			return top->nodes[i].num_hops; +	return	-EINVAL; +} +  int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  { -	struct psp_xgmi_topology_info *hive_topology; +	struct psp_xgmi_topology_info *top_info;  	struct amdgpu_hive_info *hive;  	struct amdgpu_xgmi	*entry;  	struct amdgpu_device *tmp_adev = NULL; @@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  		goto exit;  	} -	hive_topology = &hive->topology_info; +	top_info = &adev->psp.xgmi_context.top_info;  	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);  	list_for_each_entry(entry, &hive->device_list, head) -		hive_topology->nodes[count++].node_id = entry->node_id; +		top_info->nodes[count++].node_id = entry->node_id; +	top_info->num_nodes = count;  	hive->number_devices = count; -	/* Each psp need to get the latest topology */  	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { -		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); +		/* update node list for other device in the hive */ +		if (tmp_adev != adev) { +			top_info = &tmp_adev->psp.xgmi_context.top_info; +			top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; +			top_info->num_nodes = count; +		} +		ret = amdgpu_xgmi_update_topology(hive, tmp_adev); +		if (ret) +			goto exit; +	} + +	/* get latest topology info for each device from psp */ +	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { +		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, +				&tmp_adev->psp.xgmi_context.top_info);  		if (ret) {  			dev_err(tmp_adev->dev,  				"XGMI: Get topology failure on device %llx, hive %llx, ret %d",  				tmp_adev->gmc.xgmi.node_id,  				tmp_adev->gmc.xgmi.hive_id, ret);  			/* To do : continue with some node failed or disable the whole hive */ -			break; +			goto exit;  		}  	} -	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { -		ret = amdgpu_xgmi_update_topology(hive, tmp_adev); -		if (ret) -			break; -	} -  	if (!ret)  		ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + +	mutex_unlock(&hive->hive_lock); +exit:  	if (!ret)  		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",  			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); @@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,  			ret); - -	mutex_unlock(&hive->hive_lock); -exit:  	return ret;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 3e9c91e9a4bf..fbcee31788c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -27,7 +27,6 @@  struct amdgpu_hive_info {  	uint64_t		hive_id;  	struct list_head	device_list; -	struct psp_xgmi_topology_info	topology_info;  	int number_devices;  	struct mutex hive_lock, reset_lock;  	struct kobject *kobj; @@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev  int amdgpu_xgmi_add_device(struct amdgpu_device *adev);  void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);  int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, +		struct amdgpu_device *peer_adev);  static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,  		struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07c1f239e9c3..3a4f20766a39 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)  	return false;  } +static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) +{ +	uint64_t nak_r, nak_g; + +	/* Get the number of NAKs received and generated */ +	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); +	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + +	/* Add the total number of NAKs, i.e the number of replays */ +	return (nak_r + nak_g); +} +  static const struct amdgpu_asic_funcs cik_asic_funcs =  {  	.read_disabled_bios = &cik_read_disabled_bios, @@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =  	.init_doorbell_index = &legacy_doorbell_index_init,  	.get_pcie_usage = &cik_get_pcie_usage,  	.need_reset_on_init = &cik_need_reset_on_init, +	.get_pcie_replay_count = &cik_get_pcie_replay_count,  };  static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index d5ebe566809b..8c09bf994acd 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,  		*flags |= AMD_CG_SUPPORT_DF_MGCG;  } +/* hold counter assignment per gpu struct */ +struct df_v3_6_event_mask { +		struct amdgpu_device gpu; +		uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; +}; + +/* get assigned df perfmon ctr as int */ +static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, +				      uint64_t config, +				      int *counter) +{ +	struct df_v3_6_event_mask *mask; +	int i; + +	mask = container_of(adev, struct df_v3_6_event_mask, gpu); + +	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { +		if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { +			*counter = i; +			return; +		} +	} +} + +/* get address based on counter assignment */ +static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, +				 uint64_t config, +				 int is_ctrl, +				 uint32_t *lo_base_addr, +				 uint32_t *hi_base_addr) +{ + +	int target_cntr = -1; + +	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + +	if (target_cntr < 0) +		return; + +	switch (target_cntr) { + +	case 0: +		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; +		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; +		break; +	case 1: +		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; +		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; +		break; +	case 2: +		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; +		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; +		break; +	case 3: +		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; +		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; +		break; + +	} + +} + +/* get read counter address */ +static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, +					  uint64_t config, +					  uint32_t *lo_base_addr, +					  uint32_t *hi_base_addr) +{ +	df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +} + +/* get control counter settings i.e. address and values to set */ +static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, +					  uint64_t config, +					  uint32_t *lo_base_addr, +					  uint32_t *hi_base_addr, +					  uint32_t *lo_val, +					  uint32_t *hi_val) +{ + +	uint32_t eventsel, instance, unitmask; +	uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; + +	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + +	if (lo_val == NULL || hi_val == NULL) +		return; + +	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { +		DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", +				*lo_base_addr, *hi_base_addr); +		return; +	} + +	eventsel = GET_EVENT(config); +	instance = GET_INSTANCE(config); +	unitmask = GET_UNITMASK(config); + +	es_5_0 = eventsel & 0x3FUL; +	es_13_6 = instance; +	es_13_0 = (es_13_6 << 6) + es_5_0; +	es_13_12 = (es_13_0 & 0x03000UL) >> 12; +	es_11_8 = (es_13_0 & 0x0F00UL) >> 8; +	es_7_0 = es_13_0 & 0x0FFUL; +	*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); +	*hi_val = (es_11_8 | ((es_13_12)<<(29))); +} + +/* assign df performance counters for read */ +static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, +				   uint64_t config, +				   int *is_assigned) +{ + +	struct df_v3_6_event_mask *mask; +	int i, target_cntr; + +	target_cntr = -1; + +	*is_assigned = 0; + +	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + +	if (target_cntr >= 0) { +		*is_assigned = 1; +		return 0; +	} + +	mask = container_of(adev, struct df_v3_6_event_mask, gpu); + +	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { +		if (mask->config_assign_mask[i] == 0ULL) { +			mask->config_assign_mask[i] = config & 0x0FFFFFFUL; +			return 0; +		} +	} + +	return -ENOSPC; +} + +/* release performance counter */ +static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, +				     uint64_t config) +{ + +	struct df_v3_6_event_mask *mask; +	int target_cntr; + +	target_cntr = -1; + +	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + +	mask = container_of(adev, struct df_v3_6_event_mask, gpu); + +	if (target_cntr >= 0) +		mask->config_assign_mask[target_cntr] = 0ULL; + +} + +/* + * get xgmi link counters via programmable data fabric (df) counters (max 4) + * using cake tx event. + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @count -> counters to pass + * + */ + +static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, +				       int instance, +				       uint64_t *count) +{ +	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; +	uint64_t config; + +	config = GET_INSTANCE_CONFIG(instance); + +	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, +				      &hi_base_addr); + +	if ((lo_base_addr == 0) || (hi_base_addr == 0)) +		return; + +	lo_val = RREG32_PCIE(lo_base_addr); +	hi_val = RREG32_PCIE(hi_base_addr); + +	*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); +} + +/* + * reset xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ +static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, +					 int instance) +{ +	uint32_t lo_base_addr, hi_base_addr; +	uint64_t config; + +	config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); + +	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, +				      &hi_base_addr); + +	if ((lo_base_addr == 0) || (hi_base_addr == 0)) +		return; + +	WREG32_PCIE(lo_base_addr, 0UL); +	WREG32_PCIE(hi_base_addr, 0UL); +} + +/* + * add xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ + +static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, +				      int instance) +{ +	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; +	uint64_t config; +	int ret, is_assigned; + +	if (instance < 0 || instance > 1) +		return -EINVAL; + +	config = GET_INSTANCE_CONFIG(instance); + +	ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); + +	if (ret || is_assigned) +		return ret; + +	df_v3_6_pmc_get_ctrl_settings(adev, +			config, +			&lo_base_addr, +			&hi_base_addr, +			&lo_val, +			&hi_val); + +	WREG32_PCIE(lo_base_addr, lo_val); +	WREG32_PCIE(hi_base_addr, hi_val); + +	return ret; +} + + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either resume or assign event via df perfmon + * + */ + +static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, +					int instance, +					int is_enable) +{ +	uint32_t lo_base_addr, hi_base_addr, lo_val; +	uint64_t config; +	int ret; + +	if (instance < 0 || instance > 1) +		return -EINVAL; + +	if (is_enable) { + +		ret = df_v3_6_add_xgmi_link_cntr(adev, instance); + +		if (ret) +			return ret; + +	} else { + +		config = GET_INSTANCE_CONFIG(instance); + +		df_v3_6_pmc_get_ctrl_settings(adev, +				config, +				&lo_base_addr, +				&hi_base_addr, +				NULL, +				NULL); + +		if (lo_base_addr == 0) +			return -EINVAL; + +		lo_val = RREG32_PCIE(lo_base_addr); + +		WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + +		ret = 0; +	} + +	return ret; + +} + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either pause or unassign event via df perfmon + * + */ + +static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, +				       int instance, +				       int is_disable) +{ + +	uint32_t lo_base_addr, hi_base_addr, lo_val; +	uint64_t config; + +	config = GET_INSTANCE_CONFIG(instance); + +	if (is_disable) { +		df_v3_6_reset_xgmi_link_cntr(adev, instance); +		df_v3_6_pmc_release_cntr(adev, config); +	} else { + +		df_v3_6_pmc_get_ctrl_settings(adev, +				config, +				&lo_base_addr, +				&hi_base_addr, +				NULL, +				NULL); + +		if ((lo_base_addr == 0) || (hi_base_addr == 0)) +			return -EINVAL; + +		lo_val = RREG32_PCIE(lo_base_addr); + +		WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); +	} + +	return 0; +} + +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, +			     int is_enable) +{ +	int xgmi_tx_link, ret = 0; + +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 +					: (IS_DF_XGMI_1_TX(config) ? 1 : -1); + +		if (xgmi_tx_link >= 0) +			ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, +						      is_enable); + +		if (ret) +			return ret; + +		ret = 0; +		break; +	default: +		break; +	} + +	return ret; +} + +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, +			    int is_disable) +{ +	int xgmi_tx_link, ret = 0; + +	switch (adev->asic_type) { +	case CHIP_VEGA20: +			xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 +				: (IS_DF_XGMI_1_TX(config) ? 1 : -1); + +			if (xgmi_tx_link >= 0) { +				ret = df_v3_6_stop_xgmi_link_cntr(adev, +								  xgmi_tx_link, +								  is_disable); +				if (ret) +					return ret; +			} + +			ret = 0; +			break; +	default: +		break; +	} + +	return ret; +} + +static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, +				  uint64_t config, +				  uint64_t *count) +{ + +	int xgmi_tx_link; + +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 +					: (IS_DF_XGMI_1_TX(config) ? 1 : -1); + +		if (xgmi_tx_link >= 0) { +			df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); +			df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); +		} + +		break; +	default: +		break; +	} + +} +  const struct amdgpu_df_funcs df_v3_6_funcs = {  	.init = df_v3_6_init,  	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode, @@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {  	.update_medium_grain_clock_gating =  			df_v3_6_update_medium_grain_clock_gating,  	.get_clockgating_state = df_v3_6_get_clockgating_state, +	.pmc_start = df_v3_6_pmc_start, +	.pmc_stop = df_v3_6_pmc_stop, +	.pmc_get_count = df_v3_6_pmc_get_count  }; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index e79c58e5efcb..fcffd807764d 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {  	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15  }; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define AMDGPU_DF_MAX_COUNTERS		4 + +/* get flags from df perfmon config */ +#define GET_EVENT(x)			(x & 0xFFUL) +#define GET_INSTANCE(x)			((x >> 8) & 0xFFUL) +#define GET_UNITMASK(x)			((x >> 16) & 0xFFUL) +#define GET_INSTANCE_CONFIG(x)		(0ULL | (0x07ULL) \ +					| ((0x046ULL + x) << 8) \ +					| (0x02 << 16)) + +/* df event conf macros */ +#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ +		&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) +#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ +		&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) +  extern const struct amdgpu_df_funcs df_v3_6_funcs;  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a59e0fdf5a97..4cd1731d62fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle)  static int gfx_v7_0_sw_fini(void *handle)  { -	int i;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; - -	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); +	int i;  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)  static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)  {  	/* init asci gds info */ -	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); -	adev->gds.gws.total_size = 64; -	adev->gds.oa.total_size = 16; +	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); +	adev->gds.gws_size = 64; +	adev->gds.oa_size = 16;  	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - -	if (adev->gds.mem.total_size == 64 * 1024) { -		adev->gds.mem.gfx_partition_size = 4096; -		adev->gds.mem.cs_partition_size = 4096; - -		adev->gds.gws.gfx_partition_size = 4; -		adev->gds.gws.cs_partition_size = 4; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 1; -	} else { -		adev->gds.mem.gfx_partition_size = 1024; -		adev->gds.mem.cs_partition_size = 1024; - -		adev->gds.gws.gfx_partition_size = 16; -		adev->gds.gws.cs_partition_size = 16; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 4; -	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 02955e6e9dd9..25400b708722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle)  static int gfx_v8_0_sw_fini(void *handle)  { -	int i;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; - -	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); +	int i;  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)  static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)  {  	/* init asci gds info */ -	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); -	adev->gds.gws.total_size = 64; -	adev->gds.oa.total_size = 16; +	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); +	adev->gds.gws_size = 64; +	adev->gds.oa_size = 16;  	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - -	if (adev->gds.mem.total_size == 64 * 1024) { -		adev->gds.mem.gfx_partition_size = 4096; -		adev->gds.mem.cs_partition_size = 4096; - -		adev->gds.gws.gfx_partition_size = 4; -		adev->gds.gws.cs_partition_size = 4; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 1; -	} else { -		adev->gds.mem.gfx_partition_size = 1024; -		adev->gds.mem.cs_partition_size = 1024; - -		adev->gds.gws.gfx_partition_size = 16; -		adev->gds.gws.cs_partition_size = 16; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 4; -	}  }  static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..c763733619fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -34,6 +34,7 @@  #include "vega10_enum.h"  #include "hdp/hdp_4_0_offset.h" +#include "soc15.h"  #include "soc15_common.h"  #include "clearstate_gfx9.h"  #include "v9_structs.h" @@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_VEGA10: -		soc15_program_register_sequence(adev, -						 golden_settings_gc_9_0, -						 ARRAY_SIZE(golden_settings_gc_9_0)); -		soc15_program_register_sequence(adev, -						 golden_settings_gc_9_0_vg10, -						 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); +		if (!amdgpu_virt_support_skip_setting(adev)) { +			soc15_program_register_sequence(adev, +							 golden_settings_gc_9_0, +							 ARRAY_SIZE(golden_settings_gc_9_0)); +			soc15_program_register_sequence(adev, +							 golden_settings_gc_9_0_vg10, +							 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); +		}  		break;  	case CHIP_VEGA12:  		soc15_program_register_sequence(adev, @@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)  	/* GDS reserve memory: 64 bytes alignment */  	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); -	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; -	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; +	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;  	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);  	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); @@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)  	gfx_v9_0_write_data_to_reg(ring, 0, false,  				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), -			           (adev->gds.mem.total_size + +			           (adev->gds.gds_size +  				    adev->gfx.ngg.gds_reserve_size));  	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); @@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle)  		kfree(ras_if);  	} -	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); -  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);  	for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh  	else  		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); -	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); +	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);  }  static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)  	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {  		soc15_grbm_select(adev, 0, 0, 0, i);  		/* CP and shaders */ -		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); -		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); +		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); +		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);  	}  	soc15_grbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); @@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)  	u32 tmp;  	int i; -	WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); +	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);  	gfx_v9_0_tiling_mode_table_init(adev); @@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)  	 */  	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); -	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, +	WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,  		   (adev->gfx.config.sc_prim_fifo_size_frontend <<  			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |  		   (adev->gfx.config.sc_prim_fifo_size_backend << @@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,  static void gfx_v9_0_init_csb(struct amdgpu_device *adev)  {  	/* csib */ -	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),  			adev->gfx.rlc.clear_state_gpu_addr >> 32); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),  			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), +	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),  			adev->gfx.rlc.clear_state_size);  } @@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)  		for (i = 0; i < adev->gfx.num_gfx_rings; i++)  			adev->gfx.gfx_ring[i].sched.ready = false;  	} -	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); +	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);  	udelay(50);  } @@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)  	int i;  	if (enable) { -		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); +		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);  	} else { -		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, +		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,  			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));  		for (i = 0; i < adev->gfx.num_compute_rings; i++)  			adev->gfx.compute_ring[i].sched.ready = false; @@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)  	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);  	tmp &= 0xffffff00;  	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); -	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); +	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);  	tmp |= 0x80; -	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); +	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);  }  static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)  	/* disable wptr polling */  	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,  	       mqd->cp_hqd_eop_base_addr_lo); -	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,  	       mqd->cp_hqd_eop_base_addr_hi);  	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ -	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,  	       mqd->cp_hqd_eop_control);  	/* enable doorbell? */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,  	       mqd->cp_hqd_pq_doorbell_control);  	/* disable the queue if it's active */  	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { -		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);  		for (j = 0; j < adev->usec_timeout; j++) {  			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))  				break;  			udelay(1);  		} -		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,  		       mqd->cp_hqd_dequeue_request); -		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,  		       mqd->cp_hqd_pq_rptr); -		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,  		       mqd->cp_hqd_pq_wptr_lo); -		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,  		       mqd->cp_hqd_pq_wptr_hi);  	}  	/* set the pointer to the MQD */ -	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, +	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,  	       mqd->cp_mqd_base_addr_lo); -	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,  	       mqd->cp_mqd_base_addr_hi);  	/* set MQD vmid to 0 */ -	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, +	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,  	       mqd->cp_mqd_control);  	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,  	       mqd->cp_hqd_pq_base_lo); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,  	       mqd->cp_hqd_pq_base_hi);  	/* set up the HQD, this is similar to CP_RB0_CNTL */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,  	       mqd->cp_hqd_pq_control);  	/* set the wb address whether it's enabled or not */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,  				mqd->cp_hqd_pq_rptr_report_addr_lo); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,  				mqd->cp_hqd_pq_rptr_report_addr_hi);  	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,  	       mqd->cp_hqd_pq_wptr_poll_addr_lo); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,  	       mqd->cp_hqd_pq_wptr_poll_addr_hi);  	/* enable the doorbell if requested */ @@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)  	       mqd->cp_hqd_pq_doorbell_control);  	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,  	       mqd->cp_hqd_pq_wptr_lo); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,  	       mqd->cp_hqd_pq_wptr_hi);  	/* set the vmid for the queue */ -	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); -	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,  	       mqd->cp_hqd_persistent_state);  	/* activate the queue */ -	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,  	       mqd->cp_hqd_active);  	if (ring->use_doorbell) @@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)  	/* disable the queue if it's active */  	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { -		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);  		for (j = 0; j < adev->usec_timeout; j++) {  			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)  			DRM_DEBUG("KIQ dequeue request failed.\n");  			/* Manual disable if dequeue request times out */ -			WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); +			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);  		} -		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, +		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,  		      0);  	} -	WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);  	return 0;  } @@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,  				   (1 << (oa_size + oa_base)) - (1 << oa_base));  } +static const u32 vgpr_init_compute_shader[] = +{ +	0xb07c0000, 0xbe8000ff, +	0x000000f8, 0xbf110800, +	0x7e000280, 0x7e020280, +	0x7e040280, 0x7e060280, +	0x7e080280, 0x7e0a0280, +	0x7e0c0280, 0x7e0e0280, +	0x80808800, 0xbe803200, +	0xbf84fff5, 0xbf9c0000, +	0xd28c0001, 0x0001007f, +	0xd28d0001, 0x0002027e, +	0x10020288, 0xb8810904, +	0xb7814000, 0xd1196a01, +	0x00000301, 0xbe800087, +	0xbefc00c1, 0xd89c4000, +	0x00020201, 0xd89cc080, +	0x00040401, 0x320202ff, +	0x00000800, 0x80808100, +	0xbf84fff8, 0x7e020280, +	0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ +	0xb07c0000, 0xbe8000ff, +	0x0000005f, 0xbee50080, +	0xbe812c65, 0xbe822c65, +	0xbe832c65, 0xbe842c65, +	0xbe852c65, 0xb77c0005, +	0x80808500, 0xbf84fff8, +	0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ +   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { +   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, +   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, +   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, +   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, +}; + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ +	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; +	struct amdgpu_ib ib; +	struct dma_fence *f = NULL; +	int r, i, j; +	unsigned total_size, vgpr_offset, sgpr_offset; +	u64 gpu_addr; + +	/* only support when RAS is enabled */ +	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) +		return 0; + +	/* bail if the compute ring is not ready */ +	if (!ring->sched.ready) +		return 0; + +	total_size = +		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; +	total_size += +		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; +	total_size = ALIGN(total_size, 256); +	vgpr_offset = total_size; +	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); +	sgpr_offset = total_size; +	total_size += sizeof(sgpr_init_compute_shader); + +	/* allocate an indirect buffer to put the commands in */ +	memset(&ib, 0, sizeof(ib)); +	r = amdgpu_ib_get(adev, NULL, total_size, &ib); +	if (r) { +		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); +		return r; +	} + +	/* load the compute shaders */ +	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) +		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + +	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) +		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + +	/* init the ib length to 0 */ +	ib.length_dw = 0; + +	/* VGPR */ +	/* write the register state for the compute dispatch */ +	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { +		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); +		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) +								- PACKET3_SET_SH_REG_START; +		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; +	} +	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ +	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); +	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) +							- PACKET3_SET_SH_REG_START; +	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); +	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + +	/* write dispatch packet */ +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); +	ib.ptr[ib.length_dw++] = 128; /* x */ +	ib.ptr[ib.length_dw++] = 1; /* y */ +	ib.ptr[ib.length_dw++] = 1; /* z */ +	ib.ptr[ib.length_dw++] = +		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + +	/* write CS partial flush packet */ +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); +	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + +	/* SGPR */ +	/* write the register state for the compute dispatch */ +	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { +		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); +		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) +								- PACKET3_SET_SH_REG_START; +		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; +	} +	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ +	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); +	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) +							- PACKET3_SET_SH_REG_START; +	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); +	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + +	/* write dispatch packet */ +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); +	ib.ptr[ib.length_dw++] = 128; /* x */ +	ib.ptr[ib.length_dw++] = 1; /* y */ +	ib.ptr[ib.length_dw++] = 1; /* z */ +	ib.ptr[ib.length_dw++] = +		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + +	/* write CS partial flush packet */ +	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); +	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + +	/* shedule the ib on the ring */ +	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); +	if (r) { +		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); +		goto fail; +	} + +	/* wait for the GPU to finish processing the IB */ +	r = dma_fence_wait(f, false); +	if (r) { +		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); +		goto fail; +	} + +	/* read back registers to clear the counters */ +	mutex_lock(&adev->grbm_idx_mutex); +	for (j = 0; j < 16; j++) { +		gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); +		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) +			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); +		gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); +		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) +			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); +		gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); +		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) +			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); +		gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); +		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) +			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); +	} +	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); +	mutex_unlock(&adev->grbm_idx_mutex); + +fail: +	amdgpu_ib_free(adev, &ib, NULL); +	dma_fence_put(f); + +	return r; +} +  static int gfx_v9_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)  		return 0;  	} -	if (*ras_if) +	/* requires IBs so do in late init after IB pool is initialized */ +	r = gfx_v9_0_do_edc_gpr_workarounds(adev); +	if (r) +		return r; + +	/* handle resume path. */ +	if (*ras_if) { +		/* resend ras TA enable cmd during resume. +		 * prepare to handle failure. +		 */ +		ih_info.head = **ras_if; +		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); +		if (r) { +			if (r == -EAGAIN) { +				/* request a gpu reset. will run again. */ +				amdgpu_ras_request_reset_on_boot(adev, +						AMDGPU_RAS_BLOCK__GFX); +				return 0; +			} +			/* fail to enable ras, cleanup all. */ +			goto irq; +		} +		/* enable successfully. continue. */  		goto resume; +	}  	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);  	if (!*ras_if) @@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)  	**ras_if = ras_block;  	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); -	if (r) +	if (r) { +		if (r == -EAGAIN) { +			amdgpu_ras_request_reset_on_boot(adev, +					AMDGPU_RAS_BLOCK__GFX); +			r = 0; +		}  		goto feature; +	}  	ih_info.head = **ras_if;  	fs_info.head = **ras_if; @@ -3614,7 +3876,7 @@ interrupt:  feature:  	kfree(*ras_if);  	*ras_if = NULL; -	return -EINVAL; +	return r;  }  static int gfx_v9_0_late_init(void *handle) @@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,  	mutex_lock(&adev->srbm_mutex);  	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); -	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); -	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); +	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);  	soc15_grbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); @@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_VEGA20: -		adev->gds.mem.total_size = 0x10000; +		adev->gds.gds_size = 0x10000;  		break;  	case CHIP_RAVEN: -		adev->gds.mem.total_size = 0x1000; +		adev->gds.gds_size = 0x1000;  		break;  	default: -		adev->gds.mem.total_size = 0x10000; +		adev->gds.gds_size = 0x10000;  		break;  	} @@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)  		break;  	} -	adev->gds.gws.total_size = 64; -	adev->gds.oa.total_size = 16; - -	if (adev->gds.mem.total_size == 64 * 1024) { -		adev->gds.mem.gfx_partition_size = 4096; -		adev->gds.mem.cs_partition_size = 4096; - -		adev->gds.gws.gfx_partition_size = 4; -		adev->gds.gws.cs_partition_size = 4; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 1; -	} else { -		adev->gds.mem.gfx_partition_size = 1024; -		adev->gds.mem.cs_partition_size = 1024; - -		adev->gds.gws.gfx_partition_size = 16; -		adev->gds.gws.cs_partition_size = 16; - -		adev->gds.oa.gfx_partition_size = 4; -		adev->gds.oa.cs_partition_size = 4; -	} +	adev->gds.gws_size = 64; +	adev->gds.oa_size = 16;  }  static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 7bb5359d0bbd..0dc8926111e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)  	uint64_t value;  	/* Program the AGP BAR */ -	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); -	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); -	WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);  	/* Program the system aperture low logical page number. */ -	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,  		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);  	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) @@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)  		 * workaround that increase system aperture high address (add 1)  		 * to get rid of the VM fault and hardware hang.  		 */ -		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, +		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,  			     max((adev->gmc.fb_end >> 18) + 0x1,  				 adev->gmc.agp_end >> 18));  	else -		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, +		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,  			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);  	/* Set default page address. */ @@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)  			    MTYPE, MTYPE_UC);/* XXX for emulation. */  	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); -	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);  }  static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) @@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)  		 * VF copy registers so vbios post doesn't program them, for  		 * SRIOV driver need to program them  		 */ -		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, +		WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,  			     adev->gmc.vram_start >> 24); -		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, +		WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,  			     adev->gmc.vram_end >> 24);  	} @@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)  				MC_VM_MX_L1_TLB_CNTL,  				ENABLE_ADVANCED_DRIVER_MODEL,  				0); -	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +	WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);  	/* Setup L2 cache */  	WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 8a3b5e6fc6c9..8bf2ba310fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -289,7 +289,7 @@ out:   *   * @adev: amdgpu_device pointer   * - * Load the GDDR MC ucode into the hw (CIK). + * Load the GDDR MC ucode into the hw (VI).   * Returns 0 on success, error on failure.   */  static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) @@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,   * @adev: amdgpu_device pointer   *   * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). + * physical address space (VI).   */  static void gmc_v8_0_mc_program(struct amdgpu_device *adev)  { @@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   *   * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). + * vram and gart within the GPU's physical address space (VI).   * Returns 0 for success.   */  static int gmc_v8_0_mc_init(struct amdgpu_device *adev) @@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @vmid: vm instance to flush   * - * Flush the TLB for the requested page table (CIK). + * Flush the TLB for the requested page table (VI).   */  static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,  				uint32_t vmid, uint32_t flush_type) @@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)   * This sets up the TLBs, programs the page tables for VMID0,   * sets up the hw for VMIDs 1-15 which are allocated on   * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). + * and GPUVM for FSA64 clients (VI).   * Returns 0 for success, errors for failure.   */  static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) @@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)   *   * @adev: amdgpu_device pointer   * - * This disables all VM page table (CIK). + * This disables all VM page table (VI).   */  static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)  { @@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)   * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value   * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value   * - * Print human readable fault information (CIK). + * Print human readable fault information (VI).   */  static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,  				     u32 addr, u32 mc_client, unsigned pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b7370d914a5..602593bab7a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)  		return 0;  	}  	/* handle resume path. */ -	if (*ras_if) +	if (*ras_if) { +		/* resend ras TA enable cmd during resume. +		 * prepare to handle failure. +		 */ +		ih_info.head = **ras_if; +		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); +		if (r) { +			if (r == -EAGAIN) { +				/* request a gpu reset. will run again. */ +				amdgpu_ras_request_reset_on_boot(adev, +						AMDGPU_RAS_BLOCK__UMC); +				return 0; +			} +			/* fail to enable ras, cleanup all. */ +			goto irq; +		} +		/* enable successfully. continue. */  		goto resume; +	}  	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);  	if (!*ras_if) @@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)  	**ras_if = ras_block;  	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); -	if (r) +	if (r) { +		if (r == -EAGAIN) { +			amdgpu_ras_request_reset_on_boot(adev, +					AMDGPU_RAS_BLOCK__UMC); +			r = 0; +		}  		goto feature; +	}  	ih_info.head = **ras_if;  	fs_info.head = **ras_if; @@ -731,7 +754,7 @@ interrupt:  feature:  	kfree(*ras_if);  	*ras_if = NULL; -	return -EINVAL; +	return r;  } @@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_VEGA10: +		if (amdgpu_virt_support_skip_setting(adev)) +			break; +		/* fall through */  	case CHIP_VEGA20:  		soc15_program_register_sequence(adev,  						golden_settings_mmhub_1_0_0, @@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);  	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); +	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); +	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); +  	/* After HDP is initialized, flush HDP.*/  	adev->nbio_funcs->hdp_flush(adev, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 41a9a5779623..05d1d448c8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)  		WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,  			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); +	if (amdgpu_virt_support_skip_setting(adev)) +		return; +  	/* Set default page address. */  	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +  		adev->vm_manager.vram_base_offset; @@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)  {  	uint32_t tmp; +	if (amdgpu_virt_support_skip_setting(adev)) +		return; +  	/* Setup L2 cache */  	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);  	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)  static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)  { +	if (amdgpu_virt_support_skip_setting(adev)) +		return; +  	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,  		     0XFFFFFFFF);  	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)  				0);  	WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); -	/* Setup L2 cache */ -	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); -	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); -	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); +	if (!amdgpu_virt_support_skip_setting(adev)) { +		/* Setup L2 cache */ +		tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); +		WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); +		WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); +	}  }  /** @@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)  void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)  {  	u32 tmp; + +	if (amdgpu_virt_support_skip_setting(adev)) +		return; +  	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);  	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,  			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2471e7cf75ea..31030f86be86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -26,6 +26,7 @@  #include "nbio/nbio_6_1_sh_mask.h"  #include "gc/gc_9_0_offset.h"  #include "gc/gc_9_0_sh_mask.h" +#include "mp/mp_9_0_offset.h"  #include "soc15.h"  #include "vega10_ih.h"  #include "soc15_common.h" @@ -343,7 +344,7 @@ flr_done:  	/* Trigger recovery for world switch failure if no TDR */  	if (amdgpu_device_should_recover_gpu(adev) -		&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) +		&& adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)  		amdgpu_device_gpu_recover(adev, NULL);  } @@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)  	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);  } +static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) +{ +	uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); +	uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + +	adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; + +	if (rlc_fw_ver >= 0x5d) +		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; + +	if (sos_fw_ver >= 0x80455) +		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; + +	if (sos_fw_ver >= 0x8045b) +		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; +} +  const struct amdgpu_virt_ops xgpu_ai_virt_ops = {  	.req_full_gpu	= xgpu_ai_request_full_gpu_access,  	.rel_full_gpu	= xgpu_ai_release_full_gpu_access, @@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {  	.trans_msg = xgpu_ai_mailbox_trans_msg,  	.get_pp_clk = xgpu_ai_get_pp_clk,  	.force_dpm_level = xgpu_ai_force_dpm_level, +	.init_reg_access_mode = xgpu_ai_init_reg_access_mode,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..73419fa38159 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,9 +29,18 @@  #include "nbio/nbio_7_0_sh_mask.h"  #include "nbio/nbio_7_0_smn.h"  #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h>  #define smnNBIF_MGCG_CTRL_LCLK	0x1013a05c +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) +{ +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} +  static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)  {          u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,  				struct amdgpu_ring *ring)  {  	if (!ring || !ring->funcs->emit_wreg) -		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); +		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);  	else -		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( -			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); +		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);  }  static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {  	.ih_control = nbio_v7_0_ih_control,  	.init_registers = nbio_v7_0_init_registers,  	.detect_hw_virt = nbio_v7_0_detect_hw_virt, +	.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d51598cfe..bfaaa327ae3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,9 +27,18 @@  #include "nbio/nbio_7_4_offset.h"  #include "nbio/nbio_7_4_sh_mask.h"  #include "nbio/nbio_7_4_0_smn.h" +#include <uapi/linux/kfd_ioctl.h>  #define smnNBIF_MGCG_CTRL_LCLK	0x1013a21c +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) +{ +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} +  static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)  {  	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,  				struct amdgpu_ring *ring)  {  	if (!ring || !ring->funcs->emit_wreg) -		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); +		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);  	else -		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( -			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); +		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);  }  static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {  	.ih_control = nbio_v7_4_ih_control,  	.init_registers = nbio_v7_4_init_registers,  	.detect_hw_virt = nbio_v7_4_detect_hw_virt, +	.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 2f79765b4bdb..7f8edc66ddff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -94,6 +94,7 @@ enum psp_gfx_cmd_id      GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */      GFX_CMD_ID_SETUP_VMR    = 0x00000009,   /* setup VMR region */      GFX_CMD_ID_DESTROY_VMR  = 0x0000000A,   /* destroy VMR region */ +    GFX_CMD_ID_PROG_REG     = 0x0000000B,   /* program regs */  }; @@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw      enum psp_gfx_fw_type    fw_type;              /* FW type */  }; +/* Command to setup register program */ +struct psp_gfx_cmd_reg_prog { +	uint32_t	reg_value; +	uint32_t	reg_id; +}; +  /* All GFX ring buffer commands. */  union psp_gfx_commands  { @@ -226,6 +233,7 @@ union psp_gfx_commands      struct psp_gfx_cmd_setup_tmr        cmd_setup_tmr;      struct psp_gfx_cmd_load_ip_fw       cmd_load_ip_fw;      struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; +    struct psp_gfx_cmd_reg_prog       cmd_setup_reg_prog;  }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 143f0fae69d5..3f5827764df0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");  static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp); +static int psp_v3_1_ring_stop(struct psp_context *psp, +			      enum psp_ring_type ring_type); +  static int psp_v3_1_init_microcode(struct psp_context *psp)  {  	struct amdgpu_device *adev = psp->adev; @@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,  	psp_v3_1_reroute_ih(psp); -	/* Write low address of the ring to C2PMSG_69 */ -	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); -	/* Write high address of the ring to C2PMSG_70 */ -	psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); -	/* Write size of ring to C2PMSG_71 */ -	psp_ring_reg = ring->ring_size; -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); -	/* Write the ring initialization command to C2PMSG_64 */ -	psp_ring_reg = ring_type; -	psp_ring_reg = psp_ring_reg << 16; -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - -	/* there might be handshake issue with hardware which needs delay */ -	mdelay(20); - -	/* Wait for response flag (bit 31) in C2PMSG_64 */ -	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), -			   0x80000000, 0x8000FFFF, false); +	if (psp_v3_1_support_vmr_ring(psp)) { +		ret = psp_v3_1_ring_stop(psp, ring_type); +		if (ret) { +			DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); +			return ret; +		} + +		/* Write low address of the ring to C2PMSG_102 */ +		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); +		/* Write high address of the ring to C2PMSG_103 */ +		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); +		/* No size initialization for sriov  */ +		/* Write the ring initialization command to C2PMSG_101 */ +		psp_ring_reg = ring_type; +		psp_ring_reg = psp_ring_reg << 16; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + +		/* there might be hardware handshake issue which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_101 */ +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, +					mmMP0_SMN_C2PMSG_101), 0x80000000, +					0x8000FFFF, false); +	} else { + +		/* Write low address of the ring to C2PMSG_69 */ +		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); +		/* Write high address of the ring to C2PMSG_70 */ +		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); +		/* Write size of ring to C2PMSG_71 */ +		psp_ring_reg = ring->ring_size; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); +		/* Write the ring initialization command to C2PMSG_64 */ +		psp_ring_reg = ring_type; +		psp_ring_reg = psp_ring_reg << 16; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + +		/* there might be hardware handshake issue which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_64 */ +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, +					mmMP0_SMN_C2PMSG_64), 0x80000000, +					0x8000FFFF, false); +	}  	return ret;  } @@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,  	unsigned int psp_ring_reg = 0;  	struct amdgpu_device *adev = psp->adev; -	/* Write the ring destroy command to C2PMSG_64 */ -	psp_ring_reg = 3 << 16; -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - -	/* there might be handshake issue with hardware which needs delay */ -	mdelay(20); - -	/* Wait for response flag (bit 31) in C2PMSG_64 */ -	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), -			   0x80000000, 0x80000000, false); +	if (psp_v3_1_support_vmr_ring(psp)) { +		/* Write the Destroy GPCOM ring command to C2PMSG_101 */ +		psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + +		/* there might be handshake issue which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_101 */ +		ret = psp_wait_for(psp, +				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), +				0x80000000, 0x80000000, false); +	} else { +		/* Write the ring destroy command to C2PMSG_64 */ +		psp_ring_reg = 3 << 16; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + +		/* there might be handshake issue which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_64 */ +		ret = psp_wait_for(psp, +				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), +				0x80000000, 0x80000000, false); +	}  	return ret;  } @@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,  	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;  	/* KM (GPCOM) prepare write pointer */ -	psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); +	if (psp_v3_1_support_vmr_ring(psp)) +		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); +	else +		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);  	/* Update KM RB frame pointer to new frame */  	/* write_frame ptr increments by size of rb_frame in bytes */ @@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,  	/* Update the write Pointer in DWORDs */  	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; -	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); +	if (psp_v3_1_support_vmr_ring(psp)) { +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); +		/* send interrupt to PSP for SRIOV ring write pointer update */ +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, +					GFX_CTRL_CMD_ID_CONSUME_CMD); +	} else +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);  	return 0;  } @@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)  	return 0;  } +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) +{ +	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) +		return true; + +	return false; +} +  static const struct psp_funcs psp_v3_1_funcs = {  	.init_microcode = psp_v3_1_init_microcode,  	.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = {  	.compare_sram_data = psp_v3_1_compare_sram_data,  	.smu_reload_quirk = psp_v3_1_smu_reload_quirk,  	.mode1_reset = psp_v3_1_mode1_reset, +	.support_vmr_ring = psp_v3_1_support_vmr_ring,  };  void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9c88ce513d78..7a259c5b6c62 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_VEGA10: -		soc15_program_register_sequence(adev, -						 golden_settings_sdma_4, -						 ARRAY_SIZE(golden_settings_sdma_4)); -		soc15_program_register_sequence(adev, -						 golden_settings_sdma_vg10, -						 ARRAY_SIZE(golden_settings_sdma_vg10)); +		if (!amdgpu_virt_support_skip_setting(adev)) { +			soc15_program_register_sequence(adev, +							 golden_settings_sdma_4, +							 ARRAY_SIZE(golden_settings_sdma_4)); +			soc15_program_register_sequence(adev, +							 golden_settings_sdma_vg10, +							 ARRAY_SIZE(golden_settings_sdma_vg10)); +		}  		break;  	case CHIP_VEGA12:  		soc15_program_register_sequence(adev, @@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle)  	}  	/* handle resume path. */ -	if (*ras_if) +	if (*ras_if) { +		/* resend ras TA enable cmd during resume. +		 * prepare to handle failure. +		 */ +		ih_info.head = **ras_if; +		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); +		if (r) { +			if (r == -EAGAIN) { +				/* request a gpu reset. will run again. */ +				amdgpu_ras_request_reset_on_boot(adev, +						AMDGPU_RAS_BLOCK__SDMA); +				return 0; +			} +			/* fail to enable ras, cleanup all. */ +			goto irq; +		} +		/* enable successfully. continue. */  		goto resume; +	}  	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);  	if (!*ras_if) @@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle)  	**ras_if = ras_block;  	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); -	if (r) +	if (r) { +		if (r == -EAGAIN) { +			amdgpu_ras_request_reset_on_boot(adev, +					AMDGPU_RAS_BLOCK__SDMA); +			r = 0; +		}  		goto feature; +	}  	ih_info.head = **ras_if;  	fs_info.head = **ras_if; @@ -1571,7 +1596,7 @@ interrupt:  feature:  	kfree(*ras_if);  	*ras_if = NULL; -	return -EINVAL; +	return r;  }  static int sdma_v4_0_sw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d8df68893b9..4ff930a47e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,  	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);  } +static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) +{ +	uint64_t nak_r, nak_g; + +	/* Get the number of NAKs received and generated */ +	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); +	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + +	/* Add the total number of NAKs, i.e the number of replays */ +	return (nak_r + nak_g); +} +  static const struct amdgpu_asic_funcs si_asic_funcs =  {  	.read_disabled_bios = &si_read_disabled_bios, @@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =  	.need_full_reset = &si_need_full_reset,  	.get_pcie_usage = &si_get_pcie_usage,  	.need_reset_on_init = &si_need_reset_on_init, +	.get_pcie_replay_count = &si_get_pcie_replay_count,  };  static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7e594c2bfb4..d9fdd95fd6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -44,6 +44,7 @@  #include "smuio/smuio_9_0_offset.h"  #include "smuio/smuio_9_0_sh_mask.h"  #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h"  #include "nbio/nbio_7_0_sh_mask.h"  #include "nbio/nbio_7_0_smn.h"  #include "mp/mp_9_0_offset.h" @@ -64,6 +65,9 @@  #include "dce_virtual.h"  #include "mxgpu_ai.h"  #include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include <uapi/linux/kfd_ioctl.h>  #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9  #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                                          0 @@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,  	grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);  	grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); +	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);  }  static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) @@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,  			tmp &= ~(entry->and_mask);  			tmp |= entry->or_mask;  		} -		WREG32(reg, tmp); + +		if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || +			reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) || +			reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) || +			reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) +			WREG32_RLC(reg, tmp); +		else +			WREG32(reg, tmp); +  	}  } @@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)  			soc15_asic_get_baco_capability(adev, &baco_reset);  		else  			baco_reset = false; +		if (baco_reset) { +			struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); +			struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + +			if (hive || (ras && ras->supported)) +				baco_reset = false; +		}  		break;  	default:  		baco_reset = false; @@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  	case CHIP_VEGA20:  		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);  		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); -		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); -		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { -			if (adev->asic_type == CHIP_VEGA20) -				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); -			else -				amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + +		/* For Vega10 SR-IOV, PSP need to be initialized before IH */ +		if (amdgpu_sriov_vf(adev)) { +			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { +				if (adev->asic_type == CHIP_VEGA20) +					amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); +				else +					amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); +			} +			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +		} else { +			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { +				if (adev->asic_type == CHIP_VEGA20) +					amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); +				else +					amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); +			}  		}  		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);  		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); @@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)  	/* Just return false for soc15 GPUs.  Reset does not seem to  	 * be necessary.  	 */ -	return false; +	if (!amdgpu_passthrough(adev)) +		return false;  	if (adev->flags & AMD_IS_APU)  		return false; @@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)  	return false;  } +static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev) +{ +	uint64_t nak_r, nak_g; + +	/* Get the number of NAKs received and generated */ +	nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK); +	nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED); + +	/* Add the total number of NAKs, i.e the number of replays */ +	return (nak_r + nak_g); +} +  static const struct amdgpu_asic_funcs soc15_asic_funcs =  {  	.read_disabled_bios = &soc15_read_disabled_bios, @@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =  	.init_doorbell_index = &vega10_doorbell_index_init,  	.get_pcie_usage = &soc15_get_pcie_usage,  	.need_reset_on_init = &soc15_need_reset_on_init, +	.get_pcie_replay_count = &soc15_get_pcie_replay_count,  };  static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =  	.init_doorbell_index = &vega20_doorbell_index_init,  	.get_pcie_usage = &soc15_get_pcie_usage,  	.need_reset_on_init = &soc15_need_reset_on_init, +	.get_pcie_replay_count = &soc15_get_pcie_replay_count,  };  static int soc15_common_early_init(void *handle)  { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; +	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;  	adev->smc_rreg = NULL;  	adev->smc_wreg = NULL;  	adev->pcie_rreg = &soc15_pcie_rreg; @@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)  	int i;  	struct amdgpu_ring *ring; -	for (i = 0; i < adev->sdma.num_instances; i++) { -		ring = &adev->sdma.instance[i].ring; -		adev->nbio_funcs->sdma_doorbell_range(adev, i, -			ring->use_doorbell, ring->doorbell_index, -			adev->doorbell_index.sdma_doorbell_range); +	/*  Two reasons to skip +	*		1, Host driver already programmed them +	*		2, To avoid registers program violations in SR-IOV +	*/ +	if (!amdgpu_virt_support_skip_setting(adev)) { +		for (i = 0; i < adev->sdma.num_instances; i++) { +			ring = &adev->sdma.instance[i].ring; +			adev->nbio_funcs->sdma_doorbell_range(adev, i, +				ring->use_doorbell, ring->doorbell_index, +				adev->doorbell_index.sdma_doorbell_range); +		}  	}  	adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, @@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle)  	soc15_program_aspm(adev);  	/* setup nbio registers */  	adev->nbio_funcs->init_registers(adev); +	/* remap HDP registers to a hole in mmio space, +	 * for the purpose of expose those registers +	 * to process space +	 */ +	if (adev->nbio_funcs->remap_hdp_registers) +		adev->nbio_funcs->remap_hdp_registers(adev);  	/* enable the doorbell aperture */  	soc15_enable_doorbell_aperture(adev, true);  	/* HW doorbell routing policy: doorbell writing not diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index a66c8bfbbaa6..06f39f5bbf76 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -42,8 +42,18 @@ struct soc15_reg_golden {  	u32	or_mask;  }; +struct soc15_reg_entry { +	uint32_t hwip; +	uint32_t inst; +	uint32_t seg; +	uint32_t reg_offset; +	uint32_t reg_value; +}; +  #define SOC15_REG_ENTRY(ip, inst, reg)	ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_OFFSET(entry)	(adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) +  #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \  	{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 49c262540940..47f74dab365d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,26 +69,60 @@  		}						\  	} while (0) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) 	\ -		({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ -			WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,	\ -				UVD_DPG_LMA_CTL__MASK_EN_MASK |				\ -				((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ -				<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ -				(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT));	\ -			RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) +#define WREG32_RLC(reg, value) \ +	do {							\ +		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \ +			uint32_t i = 0;	\ +			uint32_t retries = 50000;	\ +			uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;	\ +			uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1;	\ +			uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT;	\ +			WREG32(r0, value);	\ +			WREG32(r1, (reg | 0x80000000));	\ +			WREG32(spare_int, 0x1);	\ +			for (i = 0; i < retries; i++) {	\ +				u32 tmp = RREG32(r1);	\ +				if (!(tmp & 0x80000000))	\ +					break;	\ +				udelay(10);	\ +			}	\ +			if (i >= retries)	\ +				pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg);	\ +		} else {	\ +			WREG32(reg, value); \ +		}	\ +	} while (0) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel)	\ +#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \  	do {							\ -		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value);	\ -		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask);		\ -		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,	\ -			UVD_DPG_LMA_CTL__READ_WRITE_MASK |	\ -			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ -			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) |	\ -			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ +		uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ +		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \ +			uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;	\ +			uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;	\ +			uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \ +			uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;   \ +			if (target_reg == grbm_cntl) \ +				WREG32(r2, value);	\ +			else if (target_reg == grbm_idx) \ +				WREG32(r3, value);	\ +			WREG32(target_reg, value);	\ +		} else {	\ +			WREG32(target_reg, value); \ +		}	\  	} while (0) -#endif +#define WREG32_SOC15_RLC(ip, inst, reg, value) \ +	do {							\ +			uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ +			WREG32_RLC(target_reg, value); \ +	} while (0) + +#define WREG32_FIELD15_RLC(ip, idx, reg, field, val)   \ +    WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ +    (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ +    & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ +    WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c4fb58667fd4..bf3385280d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = uvd_v4_2_ring_get_rptr,  	.get_wptr = uvd_v4_2_ring_get_wptr,  	.set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 52bd8a654734..3210a7bd9a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = uvd_v5_0_ring_get_rptr,  	.get_wptr = uvd_v5_0_ring_get_wptr,  	.set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..c61a314c56cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = uvd_v6_0_ring_get_rptr,  	.get_wptr = uvd_v6_0_ring_get_wptr,  	.set_wptr = uvd_v6_0_ring_set_wptr, @@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = uvd_v6_0_ring_get_rptr,  	.get_wptr = uvd_v6_0_ring_get_wptr,  	.set_wptr = uvd_v6_0_ring_set_wptr, @@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {  	.align_mask = 0x3f,  	.nop = HEVC_ENC_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = uvd_v6_0_enc_ring_get_rptr,  	.get_wptr = uvd_v6_0_enc_ring_get_wptr,  	.set_wptr = uvd_v6_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 2191d3d0a219..cdb96d4cb424 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.get_rptr = uvd_v7_0_ring_get_rptr,  	.get_wptr = uvd_v7_0_ring_get_wptr, @@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {  	.align_mask = 0x3f,  	.nop = HEVC_ENC_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.get_rptr = uvd_v7_0_enc_ring_get_rptr,  	.get_wptr = uvd_v7_0_enc_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 40363ca6c5f1..ab0cb8325796 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {  	.align_mask = 0xf,  	.nop = VCE_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = vce_v2_0_ring_get_rptr,  	.get_wptr = vce_v2_0_ring_get_wptr,  	.set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf11112..36902ec16dcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {  	.align_mask = 0xf,  	.nop = VCE_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = vce_v3_0_ring_get_rptr,  	.get_wptr = vce_v3_0_ring_get_wptr,  	.set_wptr = vce_v3_0_ring_set_wptr, @@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {  	.align_mask = 0xf,  	.nop = VCE_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.get_rptr = vce_v3_0_ring_get_rptr,  	.get_wptr = vce_v3_0_ring_get_wptr,  	.set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index c0ec27991c22..e267b073f525 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {  	.align_mask = 0x3f,  	.nop = VCE_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.get_rptr = vce_v4_0_ring_get_rptr,  	.get_wptr = vce_v4_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3dbc51f9d3b9..bb47f5b24be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);  static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);  static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, +				struct dpg_pause_state *new_state);  /**   * vcn_v1_0_early_init - set function pointers @@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)  	if (r)  		return r; -	return r; +	adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + +	return 0;  }  /** @@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)  	return r;  } +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, +				struct dpg_pause_state *new_state) +{ +	int ret_code; +	uint32_t reg_data = 0; +	uint32_t reg_data2 = 0; +	struct amdgpu_ring *ring; + +	/* pause/unpause if state is changed */ +	if (adev->vcn.pause_state.fw_based != new_state->fw_based) { +		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", +			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, +			new_state->fw_based, new_state->jpeg); + +		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & +			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + +		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { +			ret_code = 0; + +			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + +			if (!ret_code) { +				/* pause DPG non-jpeg */ +				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; +				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, +						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, +						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + +				/* Restore */ +				ring = &adev->vcn.ring_enc[0]; +				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); +				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); +				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); +				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); +				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + +				ring = &adev->vcn.ring_enc[1]; +				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); +				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); +				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); +				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); +				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + +				ring = &adev->vcn.ring_dec; +				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, +						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, +						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); +			} +		} else { +			/* unpause dpg non-jpeg, no need to wait */ +			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; +			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); +		} +		adev->vcn.pause_state.fw_based = new_state->fw_based; +	} + +	/* pause/unpause if state is changed */ +	if (adev->vcn.pause_state.jpeg != new_state->jpeg) { +		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", +			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, +			new_state->fw_based, new_state->jpeg); + +		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & +			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + +		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { +			ret_code = 0; + +			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + +			if (!ret_code) { +				/* Make sure JPRG Snoop is disabled before sending the pause */ +				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); +				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; +				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + +				/* pause DPG jpeg */ +				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; +				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, +							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, +							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + +				/* Restore */ +				ring = &adev->vcn.ring_jpeg; +				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); +				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, +							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | +							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); +				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, +							lower_32_bits(ring->gpu_addr)); +				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, +							upper_32_bits(ring->gpu_addr)); +				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); +				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); +				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, +							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + +				ring = &adev->vcn.ring_dec; +				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, +						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); +				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, +						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, +						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); +			} +		} else { +			/* unpause dpg jpeg, no need to wait */ +			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; +			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); +		} +		adev->vcn.pause_state.jpeg = new_state->jpeg; +	} + +	return 0; +} +  static bool vcn_v1_0_is_idle(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_VCN_DEC,  	.align_mask = 0xf,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.get_rptr = vcn_v1_0_dec_ring_get_rptr,  	.get_wptr = vcn_v1_0_dec_ring_get_wptr, @@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {  	.align_mask = 0x3f,  	.nop = VCN_ENC_CMD_NO_OP,  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.get_rptr = vcn_v1_0_enc_ring_get_rptr,  	.get_wptr = vcn_v1_0_enc_ring_get_wptr, @@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {  	.align_mask = 0xf,  	.nop = PACKET0(0x81ff, 0),  	.support_64bit_ptrs = false, +	.no_user_fence = true,  	.vmhub = AMDGPU_MMHUB,  	.extra_dw = 64,  	.get_rptr = vcn_v1_0_jpeg_ring_get_rptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 8d89ab7f0ae8..5f54acc70fec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); -	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); +	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { +			DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); +			return; +		} +	} else { +		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); +	}  	adev->irq.ih.enabled = true;  	if (adev->irq.ih1.ring_size) {  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,  					   RB_ENABLE, 1); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); +				return; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		}  		adev->irq.ih1.enabled = true;  	} @@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,  					   RB_ENABLE, 1); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); +				return; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); +		}  		adev->irq.ih2.enabled = true;  	}  } @@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); -	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); +	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { +			DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); +			return; +		} +	} else { +		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); +	} +  	/* set rptr, wptr to 0 */  	WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);  	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); @@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,  					   RB_ENABLE, 0); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); +				return; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		}  		/* set rptr, wptr to 0 */  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,  					   RB_ENABLE, 0); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); +				return; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); +		} +  		/* set rptr, wptr to 0 */  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); @@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  	ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,  				   !!adev->irq.msi_enabled); -	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + +	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { +			DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); +			return -ETIMEDOUT; +		} +	} else { +		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); +	}  	/* set the writeback address whether it's enabled or not */  	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  					   WPTR_OVERFLOW_ENABLE, 0);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,  					   RB_FULL_DRAIN_ENABLE, 1); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); +				return -ETIMEDOUT; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); +		}  		/* set rptr, wptr to 0 */  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); -		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + +		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, +						ih_rb_cntl)) { +				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); +				return -ETIMEDOUT; +			} +		} else { +			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); +		}  		/* set rptr, wptr to 0 */  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5e5b42a0744a..b8adf3808de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,  	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);  } +static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev) +{ +	uint64_t nak_r, nak_g; + +	/* Get the number of NAKs received and generated */ +	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); +	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + +	/* Add the total number of NAKs, i.e the number of replays */ +	return (nak_r + nak_g); +} +  static bool vi_need_reset_on_init(struct amdgpu_device *adev)  {  	u32 clock_cntl, pc; @@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =  	.init_doorbell_index = &legacy_doorbell_index_init,  	.get_pcie_usage = &vi_get_pcie_usage,  	.need_reset_on_init = &vi_need_reset_on_init, +	.get_pcie_replay_count = &vi_get_pcie_replay_count,  };  #define CZ_REV_BRISTOL(rev)	 \ diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 3621efbd5759..e413d4a71fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,7 +21,7 @@   */  static const uint32_t cwsr_trap_gfx8_hex[] = { -	0xbf820001, 0xbf82012b, +	0xbf820001, 0xbf820121,  	0xb8f4f802, 0x89748674,  	0xb8f5f803, 0x8675ff75,  	0x00000400, 0xbf850017, @@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {  	0x8671ff71, 0x0000ffff,  	0x8f728374, 0xb972e0c2,  	0xbf800002, 0xb9740002, -	0xbe801f70, 0xb8f5f803, -	0x8675ff75, 0x00000100, -	0xbf840006, 0xbefa0080, -	0xb97a0203, 0x8671ff71, -	0x0000ffff, 0x80f08870, -	0x82f18071, 0xbefa0080, +	0xbe801f70, 0xbefa0080,  	0xb97a0283, 0xbef60068,  	0xbef70069, 0xb8fa1c07,  	0x8e7a9c7a, 0x87717a71, @@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {  static const uint32_t cwsr_trap_gfx9_hex[] = { -	0xbf820001, 0xbf82015d, +	0xbf820001, 0xbf82015e,  	0xb8f8f802, 0x89788678, -	0xb8f1f803, 0x866eff71, -	0x00000400, 0xbf850037, -	0x866eff71, 0x00000800, -	0xbf850003, 0x866eff71, -	0x00000100, 0xbf840008, +	0xb8fbf803, 0x866eff7b, +	0x00000400, 0xbf85003b, +	0x866eff7b, 0x00000800, +	0xbf850003, 0x866eff7b, +	0x00000100, 0xbf84000c,  	0x866eff78, 0x00002000, -	0xbf840001, 0xbf810000, +	0xbf840005, 0xbf8e0010, +	0xb8eef803, 0x866eff6e, +	0x00000400, 0xbf84fffb,  	0x8778ff78, 0x00002000,  	0x80ec886c, 0x82ed806d,  	0xb8eef807, 0x866fff6e, @@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {  	0x8977ff77, 0xfc000000,  	0x87776f77, 0x896eff6e,  	0x001f8000, 0xb96ef807, -	0xb8f0f812, 0xb8f1f813, -	0x8ef08870, 0xc0071bb8, +	0xb8faf812, 0xb8fbf813, +	0x8efa887a, 0xc0071bbd,  	0x00000000, 0xbf8cc07f, -	0xc0071c38, 0x00000008, +	0xc0071ebd, 0x00000008,  	0xbf8cc07f, 0x86ee6e6e,  	0xbf840001, 0xbe801d6e, -	0xb8f1f803, 0x8671ff71, +	0xb8fbf803, 0x867bff7b,  	0x000001ff, 0xbf850002,  	0x806c846c, 0x826d806d,  	0x866dff6d, 0x0000ffff, @@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {  	0x8f6e8378, 0xb96ee0c2,  	0xbf800002, 0xb9780002,  	0xbe801f6c, 0x866dff6d, -	0x0000ffff, 0xbef00080, -	0xb9700283, 0xb8f02407, -	0x8e709c70, 0x876d706d, -	0xb8f003c7, 0x8e709b70, -	0x876d706d, 0xb8f0f807, -	0x8670ff70, 0x00007fff, -	0xb970f807, 0xbeee007e, +	0x0000ffff, 0xbefa0080, +	0xb97a0283, 0xb8fa2407, +	0x8e7a9b7a, 0x876d7a6d, +	0xb8fa03c7, 0x8e7a9a7a, +	0x876d7a6d, 0xb8faf807, +	0x867aff7a, 0x00007fff, +	0xb97af807, 0xbeee007e,  	0xbeef007f, 0xbefe0180, -	0xbf900004, 0x87708478, -	0xb970f802, 0xbf8e0002, -	0xbf88fffe, 0xb8f02a05, +	0xbf900004, 0x877a8478, +	0xb97af802, 0xbf8e0002, +	0xbf88fffe, 0xb8fa2a05, +	0x807a817a, 0x8e7a8a7a, +	0xb8fb1605, 0x807b817b, +	0x8e7b867b, 0x807a7b7a, +	0x807a7e7a, 0x827b807f, +	0x867bff7b, 0x0000ffff, +	0xc04b1c3d, 0x00000050, +	0xbf8cc07f, 0xc04b1d3d, +	0x00000060, 0xbf8cc07f, +	0xc0431e7d, 0x00000074, +	0xbf8cc07f, 0xbef4007e, +	0x8675ff7f, 0x0000ffff, +	0x8775ff75, 0x00040000, +	0xbef60080, 0xbef700ff, +	0x00807fac, 0x867aff7f, +	0x08000000, 0x8f7a837a, +	0x87777a77, 0x867aff7f, +	0x70000000, 0x8f7a817a, +	0x87777a77, 0xbef1007c, +	0xbef00080, 0xb8f02a05,  	0x80708170, 0x8e708a70, -	0xb8f11605, 0x80718171, -	0x8e718671, 0x80707170, -	0x80707e70, 0x8271807f, -	0x8671ff71, 0x0000ffff, -	0xc0471cb8, 0x00000040, -	0xbf8cc07f, 0xc04b1d38, -	0x00000048, 0xbf8cc07f, -	0xc0431e78, 0x00000058, -	0xbf8cc07f, 0xc0471eb8, -	0x0000005c, 0xbf8cc07f, -	0xbef4007e, 0x8675ff7f, -	0x0000ffff, 0x8775ff75, -	0x00040000, 0xbef60080, -	0xbef700ff, 0x00807fac, -	0x8670ff7f, 0x08000000, -	0x8f708370, 0x87777077, -	0x8670ff7f, 0x70000000, -	0x8f708170, 0x87777077, -	0xbefb007c, 0xbefa0080, -	0xb8fa2a05, 0x807a817a, -	0x8e7a8a7a, 0xb8f01605, -	0x80708170, 0x8e708670, -	0x807a707a, 0xbef60084, -	0xbef600ff, 0x01000000, -	0xbefe007c, 0xbefc007a, -	0xc0611efa, 0x0000007c, -	0xbf8cc07f, 0x807a847a, -	0xbefc007e, 0xbefe007c, -	0xbefc007a, 0xc0611b3a, +	0xb8fa1605, 0x807a817a, +	0x8e7a867a, 0x80707a70, +	0xbef60084, 0xbef600ff, +	0x01000000, 0xbefe007c, +	0xbefc0070, 0xc0611c7a,  	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0xbefe007c, 0xbefc007a, -	0xc0611b7a, 0x0000007c, -	0xbf8cc07f, 0x807a847a, +	0x80708470, 0xbefc007e, +	0xbefe007c, 0xbefc0070, +	0xc0611b3a, 0x0000007c, +	0xbf8cc07f, 0x80708470,  	0xbefc007e, 0xbefe007c, -	0xbefc007a, 0xc0611bba, +	0xbefc0070, 0xc0611b7a,  	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0xbefe007c, 0xbefc007a, -	0xc0611bfa, 0x0000007c, -	0xbf8cc07f, 0x807a847a, +	0x80708470, 0xbefc007e, +	0xbefe007c, 0xbefc0070, +	0xc0611bba, 0x0000007c, +	0xbf8cc07f, 0x80708470,  	0xbefc007e, 0xbefe007c, -	0xbefc007a, 0xc0611e3a, +	0xbefc0070, 0xc0611bfa,  	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0xb8f1f803, 0xbefe007c, -	0xbefc007a, 0xc0611c7a, -	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0xbefe007c, 0xbefc007a, -	0xc0611a3a, 0x0000007c, -	0xbf8cc07f, 0x807a847a, +	0x80708470, 0xbefc007e, +	0xbefe007c, 0xbefc0070, +	0xc0611e3a, 0x0000007c, +	0xbf8cc07f, 0x80708470, +	0xbefc007e, 0xb8fbf803, +	0xbefe007c, 0xbefc0070, +	0xc0611efa, 0x0000007c, +	0xbf8cc07f, 0x80708470,  	0xbefc007e, 0xbefe007c, -	0xbefc007a, 0xc0611a7a, -	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0xb8fbf801, 0xbefe007c, -	0xbefc007a, 0xc0611efa, +	0xbefc0070, 0xc0611a3a,  	0x0000007c, 0xbf8cc07f, -	0x807a847a, 0xbefc007e, -	0x8670ff7f, 0x04000000, -	0xbeef0080, 0x876f6f70, -	0xb8fa2a05, 0x807a817a, -	0x8e7a8a7a, 0xb8f11605, -	0x80718171, 0x8e718471, -	0x8e768271, 0xbef600ff, -	0x01000000, 0xbef20174, -	0x80747a74, 0x82758075, -	0xbefc0080, 0xbf800000, -	0xbe802b00, 0xbe822b02, -	0xbe842b04, 0xbe862b06, -	0xbe882b08, 0xbe8a2b0a, -	0xbe8c2b0c, 0xbe8e2b0e, -	0xc06b003a, 0x00000000, -	0xbf8cc07f, 0xc06b013a, -	0x00000010, 0xbf8cc07f, -	0xc06b023a, 0x00000020, -	0xbf8cc07f, 0xc06b033a, -	0x00000030, 0xbf8cc07f, -	0x8074c074, 0x82758075, -	0x807c907c, 0xbf0a717c, -	0xbf85ffe7, 0xbef40172, -	0xbefa0080, 0xbefe00c1, -	0xbeff00c1, 0xbee80080, -	0xbee90080, 0xbef600ff, -	0x01000000, 0xe0724000, -	0x7a1d0000, 0xe0724100, -	0x7a1d0100, 0xe0724200, -	0x7a1d0200, 0xe0724300, -	0x7a1d0300, 0xbefe00c1, -	0xbeff00c1, 0xb8f14306, -	0x8671c171, 0xbf84002c, -	0xbf8a0000, 0x8670ff6f, -	0x04000000, 0xbf840028, -	0x8e718671, 0x8e718271, -	0xbef60071, 0xb8fa2a05, -	0x807a817a, 0x8e7a8a7a, -	0xb8f01605, 0x80708170, -	0x8e708670, 0x807a707a, -	0x807aff7a, 0x00000080, +	0x80708470, 0xbefc007e, +	0xbefe007c, 0xbefc0070, +	0xc0611a7a, 0x0000007c, +	0xbf8cc07f, 0x80708470, +	0xbefc007e, 0xb8f1f801, +	0xbefe007c, 0xbefc0070, +	0xc0611c7a, 0x0000007c, +	0xbf8cc07f, 0x80708470, +	0xbefc007e, 0x867aff7f, +	0x04000000, 0xbeef0080, +	0x876f6f7a, 0xb8f02a05, +	0x80708170, 0x8e708a70, +	0xb8fb1605, 0x807b817b, +	0x8e7b847b, 0x8e76827b,  	0xbef600ff, 0x01000000, -	0xbefc0080, 0xd28c0002, -	0x000100c1, 0xd28d0003, -	0x000204c1, 0xd1060002, -	0x00011103, 0x7e0602ff, -	0x00000200, 0xbefc00ff, -	0x00010000, 0xbe800077, -	0x8677ff77, 0xff7fffff, -	0x8777ff77, 0x00058000, -	0xd8ec0000, 0x00000002, -	0xbf8cc07f, 0xe0765000, -	0x7a1d0002, 0x68040702, -	0xd0c9006a, 0x0000e302, -	0xbf87fff7, 0xbef70000, -	0xbefa00ff, 0x00000400, +	0xbef20174, 0x80747074, +	0x82758075, 0xbefc0080, +	0xbf800000, 0xbe802b00, +	0xbe822b02, 0xbe842b04, +	0xbe862b06, 0xbe882b08, +	0xbe8a2b0a, 0xbe8c2b0c, +	0xbe8e2b0e, 0xc06b003a, +	0x00000000, 0xbf8cc07f, +	0xc06b013a, 0x00000010, +	0xbf8cc07f, 0xc06b023a, +	0x00000020, 0xbf8cc07f, +	0xc06b033a, 0x00000030, +	0xbf8cc07f, 0x8074c074, +	0x82758075, 0x807c907c, +	0xbf0a7b7c, 0xbf85ffe7, +	0xbef40172, 0xbef00080,  	0xbefe00c1, 0xbeff00c1, -	0xb8f12a05, 0x80718171, -	0x8e718271, 0x8e768871, +	0xbee80080, 0xbee90080,  	0xbef600ff, 0x01000000, -	0xbefc0084, 0xbf0a717c, -	0xbf840015, 0xbf11017c, -	0x8071ff71, 0x00001000, -	0x7e000300, 0x7e020301, -	0x7e040302, 0x7e060303, -	0xe0724000, 0x7a1d0000, -	0xe0724100, 0x7a1d0100, -	0xe0724200, 0x7a1d0200, -	0xe0724300, 0x7a1d0300, -	0x807c847c, 0x807aff7a, -	0x00000400, 0xbf0a717c, -	0xbf85ffef, 0xbf9c0000, -	0xbf8200dc, 0xbef4007e, -	0x8675ff7f, 0x0000ffff, -	0x8775ff75, 0x00040000, -	0xbef60080, 0xbef700ff, -	0x00807fac, 0x866eff7f, -	0x08000000, 0x8f6e836e, -	0x87776e77, 0x866eff7f, -	0x70000000, 0x8f6e816e, -	0x87776e77, 0x866eff7f, -	0x04000000, 0xbf84001e, +	0xe0724000, 0x701d0000, +	0xe0724100, 0x701d0100, +	0xe0724200, 0x701d0200, +	0xe0724300, 0x701d0300,  	0xbefe00c1, 0xbeff00c1, -	0xb8ef4306, 0x866fc16f, -	0xbf840019, 0x8e6f866f, -	0x8e6f826f, 0xbef6006f, -	0xb8f82a05, 0x80788178, -	0x8e788a78, 0xb8ee1605, -	0x806e816e, 0x8e6e866e, -	0x80786e78, 0x8078ff78, +	0xb8fb4306, 0x867bc17b, +	0xbf84002c, 0xbf8a0000, +	0x867aff6f, 0x04000000, +	0xbf840028, 0x8e7b867b, +	0x8e7b827b, 0xbef6007b, +	0xb8f02a05, 0x80708170, +	0x8e708a70, 0xb8fa1605, +	0x807a817a, 0x8e7a867a, +	0x80707a70, 0x8070ff70,  	0x00000080, 0xbef600ff,  	0x01000000, 0xbefc0080, -	0xe0510000, 0x781d0000, -	0xe0510100, 0x781d0000, -	0x807cff7c, 0x00000200, -	0x8078ff78, 0x00000200, -	0xbf0a6f7c, 0xbf85fff6, -	0xbef80080, 0xbefe00c1, -	0xbeff00c1, 0xb8ef2a05, -	0x806f816f, 0x8e6f826f, -	0x8e76886f, 0xbef600ff, -	0x01000000, 0xbeee0078, -	0x8078ff78, 0x00000400, -	0xbefc0084, 0xbf11087c, -	0x806fff6f, 0x00008000, -	0xe0524000, 0x781d0000, -	0xe0524100, 0x781d0100, -	0xe0524200, 0x781d0200, -	0xe0524300, 0x781d0300, -	0xbf8c0f70, 0x7e000300, +	0xd28c0002, 0x000100c1, +	0xd28d0003, 0x000204c1, +	0xd1060002, 0x00011103, +	0x7e0602ff, 0x00000200, +	0xbefc00ff, 0x00010000, +	0xbe800077, 0x8677ff77, +	0xff7fffff, 0x8777ff77, +	0x00058000, 0xd8ec0000, +	0x00000002, 0xbf8cc07f, +	0xe0765000, 0x701d0002, +	0x68040702, 0xd0c9006a, +	0x0000f702, 0xbf87fff7, +	0xbef70000, 0xbef000ff, +	0x00000400, 0xbefe00c1, +	0xbeff00c1, 0xb8fb2a05, +	0x807b817b, 0x8e7b827b, +	0x8e76887b, 0xbef600ff, +	0x01000000, 0xbefc0084, +	0xbf0a7b7c, 0xbf840015, +	0xbf11017c, 0x807bff7b, +	0x00001000, 0x7e000300,  	0x7e020301, 0x7e040302, -	0x7e060303, 0x807c847c, -	0x8078ff78, 0x00000400, -	0xbf0a6f7c, 0xbf85ffee, -	0xbf9c0000, 0xe0524000, -	0x6e1d0000, 0xe0524100, -	0x6e1d0100, 0xe0524200, -	0x6e1d0200, 0xe0524300, -	0x6e1d0300, 0xb8f82a05, +	0x7e060303, 0xe0724000, +	0x701d0000, 0xe0724100, +	0x701d0100, 0xe0724200, +	0x701d0200, 0xe0724300, +	0x701d0300, 0x807c847c, +	0x8070ff70, 0x00000400, +	0xbf0a7b7c, 0xbf85ffef, +	0xbf9c0000, 0xbf8200da, +	0xbef4007e, 0x8675ff7f, +	0x0000ffff, 0x8775ff75, +	0x00040000, 0xbef60080, +	0xbef700ff, 0x00807fac, +	0x866eff7f, 0x08000000, +	0x8f6e836e, 0x87776e77, +	0x866eff7f, 0x70000000, +	0x8f6e816e, 0x87776e77, +	0x866eff7f, 0x04000000, +	0xbf84001e, 0xbefe00c1, +	0xbeff00c1, 0xb8ef4306, +	0x866fc16f, 0xbf840019, +	0x8e6f866f, 0x8e6f826f, +	0xbef6006f, 0xb8f82a05,  	0x80788178, 0x8e788a78,  	0xb8ee1605, 0x806e816e,  	0x8e6e866e, 0x80786e78, -	0x80f8c078, 0xb8ef1605, -	0x806f816f, 0x8e6f846f, -	0x8e76826f, 0xbef600ff, -	0x01000000, 0xbefc006f, -	0xc031003a, 0x00000078, -	0x80f8c078, 0xbf8cc07f, -	0x80fc907c, 0xbf800000, -	0xbe802d00, 0xbe822d02, -	0xbe842d04, 0xbe862d06, -	0xbe882d08, 0xbe8a2d0a, -	0xbe8c2d0c, 0xbe8e2d0e, -	0xbf06807c, 0xbf84fff0, +	0x8078ff78, 0x00000080, +	0xbef600ff, 0x01000000, +	0xbefc0080, 0xe0510000, +	0x781d0000, 0xe0510100, +	0x781d0000, 0x807cff7c, +	0x00000200, 0x8078ff78, +	0x00000200, 0xbf0a6f7c, +	0xbf85fff6, 0xbef80080, +	0xbefe00c1, 0xbeff00c1, +	0xb8ef2a05, 0x806f816f, +	0x8e6f826f, 0x8e76886f, +	0xbef600ff, 0x01000000, +	0xbeee0078, 0x8078ff78, +	0x00000400, 0xbefc0084, +	0xbf11087c, 0x806fff6f, +	0x00008000, 0xe0524000, +	0x781d0000, 0xe0524100, +	0x781d0100, 0xe0524200, +	0x781d0200, 0xe0524300, +	0x781d0300, 0xbf8c0f70, +	0x7e000300, 0x7e020301, +	0x7e040302, 0x7e060303, +	0x807c847c, 0x8078ff78, +	0x00000400, 0xbf0a6f7c, +	0xbf85ffee, 0xbf9c0000, +	0xe0524000, 0x6e1d0000, +	0xe0524100, 0x6e1d0100, +	0xe0524200, 0x6e1d0200, +	0xe0524300, 0x6e1d0300,  	0xb8f82a05, 0x80788178,  	0x8e788a78, 0xb8ee1605,  	0x806e816e, 0x8e6e866e, -	0x80786e78, 0xbef60084, +	0x80786e78, 0x80f8c078, +	0xb8ef1605, 0x806f816f, +	0x8e6f846f, 0x8e76826f,  	0xbef600ff, 0x01000000, -	0xc0211bfa, 0x00000078, -	0x80788478, 0xc0211b3a, +	0xbefc006f, 0xc031003a, +	0x00000078, 0x80f8c078, +	0xbf8cc07f, 0x80fc907c, +	0xbf800000, 0xbe802d00, +	0xbe822d02, 0xbe842d04, +	0xbe862d06, 0xbe882d08, +	0xbe8a2d0a, 0xbe8c2d0c, +	0xbe8e2d0e, 0xbf06807c, +	0xbf84fff0, 0xb8f82a05, +	0x80788178, 0x8e788a78, +	0xb8ee1605, 0x806e816e, +	0x8e6e866e, 0x80786e78, +	0xbef60084, 0xbef600ff, +	0x01000000, 0xc0211bfa,  	0x00000078, 0x80788478, -	0xc0211b7a, 0x00000078, -	0x80788478, 0xc0211eba, +	0xc0211b3a, 0x00000078, +	0x80788478, 0xc0211b7a,  	0x00000078, 0x80788478, -	0xc0211efa, 0x00000078, -	0x80788478, 0xc0211c3a, +	0xc0211c3a, 0x00000078, +	0x80788478, 0xc0211c7a,  	0x00000078, 0x80788478, -	0xc0211c7a, 0x00000078, -	0x80788478, 0xc0211a3a, +	0xc0211eba, 0x00000078, +	0x80788478, 0xc0211efa,  	0x00000078, 0x80788478, -	0xc0211a7a, 0x00000078, -	0x80788478, 0xc0211cfa, +	0xc0211a3a, 0x00000078, +	0x80788478, 0xc0211a7a,  	0x00000078, 0x80788478, -	0xbf8cc07f, 0xbefc006f, -	0xbefe007a, 0xbeff007b, -	0x866f71ff, 0x000003ff, -	0xb96f4803, 0x866f71ff, -	0xfffff800, 0x8f6f8b6f, -	0xb96fa2c3, 0xb973f801, -	0xb8ee2a05, 0x806e816e, -	0x8e6e8a6e, 0xb8ef1605, -	0x806f816f, 0x8e6f866f, -	0x806e6f6e, 0x806e746e, -	0x826f8075, 0x866fff6f, -	0x0000ffff, 0xc0071cb7, -	0x00000040, 0xc00b1d37, -	0x00000048, 0xc0031e77, -	0x00000058, 0xc0071eb7, -	0x0000005c, 0xbf8cc07f, -	0x866fff6d, 0xf0000000, -	0x8f6f9c6f, 0x8e6f906f, -	0xbeee0080, 0x876e6f6e, -	0x866fff6d, 0x08000000, -	0x8f6f9b6f, 0x8e6f8f6f, -	0x876e6f6e, 0x866fff70, -	0x00800000, 0x8f6f976f, -	0xb96ef807, 0x866dff6d, -	0x0000ffff, 0x86fe7e7e, -	0x86ea6a6a, 0x8f6e8370, -	0xb96ee0c2, 0xbf800002, -	0xb9700002, 0xbf8a0000, -	0x95806f6c, 0xbf810000, +	0xc0211cfa, 0x00000078, +	0x80788478, 0xbf8cc07f, +	0xbefc006f, 0xbefe0070, +	0xbeff0071, 0x866f7bff, +	0x000003ff, 0xb96f4803, +	0x866f7bff, 0xfffff800, +	0x8f6f8b6f, 0xb96fa2c3, +	0xb973f801, 0xb8ee2a05, +	0x806e816e, 0x8e6e8a6e, +	0xb8ef1605, 0x806f816f, +	0x8e6f866f, 0x806e6f6e, +	0x806e746e, 0x826f8075, +	0x866fff6f, 0x0000ffff, +	0xc00b1c37, 0x00000050, +	0xc00b1d37, 0x00000060, +	0xc0031e77, 0x00000074, +	0xbf8cc07f, 0x866fff6d, +	0xf8000000, 0x8f6f9b6f, +	0x8e6f906f, 0xbeee0080, +	0x876e6f6e, 0x866fff6d, +	0x04000000, 0x8f6f9a6f, +	0x8e6f8f6f, 0x876e6f6e, +	0x866fff7a, 0x00800000, +	0x8f6f976f, 0xb96ef807, +	0x866dff6d, 0x0000ffff, +	0x86fe7e7e, 0x86ea6a6a, +	0x8f6e837a, 0xb96ee0c2, +	0xbf800002, 0xb97a0002, +	0xbf8a0000, 0x95806f6c, +	0xbf810000, 0x00000000,  }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index abe1a5da29fb..a47f5b933120 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP          s_waitcnt lgkmcnt(0)         //FIXME, will cause xnack??  end -    //check whether there is mem_viol -    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS) -    s_and_b32   s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK -    s_cbranch_scc0  L_NO_PC_REWIND - -    //if so, need rewind PC assuming GDS operation gets NACKed -    s_mov_b32       s_save_tmp, 0                                                           //clear mem_viol bit -    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp    //clear mem_viol bit -    s_and_b32       s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32] -    s_sub_u32       s_save_pc_lo, s_save_pc_lo, 8             //pc[31:0]-8 -    s_subb_u32      s_save_pc_hi, s_save_pc_hi, 0x0           // -scc - -L_NO_PC_REWIND:      s_mov_b32       s_save_tmp, 0                                                           //clear saveCtx bit      s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp     //clear saveCtx bit diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0bb9c577b3a2..6bae2e022c6e 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT		=   28  var S_SAVE_SPI_INIT_FIRST_WAVE_MASK	=   0x04000000		//bit[26]: FirstWaveInTG  var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT	=   26 -var S_SAVE_PC_HI_RCNT_SHIFT		=   28			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK		=   0xF0000000		//FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   27			//FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x08000000		//FIXME +var S_SAVE_PC_HI_RCNT_SHIFT		=   27			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK		=   0xF8000000		//FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   26			//FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x04000000		//FIXME  var s_save_spi_init_lo		    =	exec_lo  var s_save_spi_init_hi		    =	exec_hi @@ -162,8 +162,8 @@ var s_save_pc_lo	    =	ttmp0		//{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra  var s_save_pc_hi	    =	ttmp1  var s_save_exec_lo	    =	ttmp2  var s_save_exec_hi	    =	ttmp3 -var s_save_tmp		    =	ttmp4 -var s_save_trapsts	    =	ttmp5		//not really used until the end of the SAVE routine +var s_save_tmp		    =	ttmp14 +var s_save_trapsts	    =	ttmp15		//not really used until the end of the SAVE routine  var s_save_xnack_mask_lo    =	ttmp6  var s_save_xnack_mask_hi    =	ttmp7  var s_save_buf_rsrc0	    =	ttmp8 @@ -171,9 +171,9 @@ var s_save_buf_rsrc1	    =	ttmp9  var s_save_buf_rsrc2	    =	ttmp10  var s_save_buf_rsrc3	    =	ttmp11  var s_save_status	    =	ttmp12 -var s_save_mem_offset	    =	ttmp14 +var s_save_mem_offset	    =	ttmp4  var s_save_alloc_size	    =	s_save_trapsts		//conflict -var s_save_m0		    =	ttmp15 +var s_save_m0		    =	ttmp5  var s_save_ttmps_lo	    =	s_save_tmp		//no conflict  var s_save_ttmps_hi	    =	s_save_trapsts		//no conflict @@ -207,10 +207,10 @@ var s_restore_mode	    =	ttmp7  var s_restore_pc_lo	    =	ttmp0  var s_restore_pc_hi	    =	ttmp1 -var s_restore_exec_lo	    =	ttmp14 -var s_restore_exec_hi	    = 	ttmp15 -var s_restore_status	    =	ttmp4 -var s_restore_trapsts	    =	ttmp5 +var s_restore_exec_lo	    =	ttmp4 +var s_restore_exec_hi	    = 	ttmp5 +var s_restore_status	    =	ttmp14 +var s_restore_trapsts	    =	ttmp15  var s_restore_xnack_mask_lo =	xnack_mask_lo  var s_restore_xnack_mask_hi =	xnack_mask_hi  var s_restore_buf_rsrc0	    =	ttmp8 @@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)  L_HALT_WAVE:      // If STATUS.HALT is set then this fault must come from SQC instruction fetch. -    // We cannot prevent further faults so just terminate the wavefront. +    // We cannot prevent further faults. Spin wait until context saved.      s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK      s_cbranch_scc0  L_NOT_ALREADY_HALTED -    s_endpgm + +L_WAIT_CTX_SAVE: +    s_sleep         0x10 +    s_getreg_b32    ttmp2, hwreg(HW_REG_TRAPSTS) +    s_and_b32       ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK +    s_cbranch_scc0  L_WAIT_CTX_SAVE +  L_NOT_ALREADY_HALTED:      s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK @@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:      // Read second-level TBA/TMA from first-level TMA and jump if available.      // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)      // ttmp12 holds SQ_WAVE_STATUS -    s_getreg_b32    ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) -    s_getreg_b32    ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) -    s_lshl_b64      [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 -    s_load_dwordx2  [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA +    s_getreg_b32    ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) +    s_getreg_b32    ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) +    s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 +    s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA      s_waitcnt       lgkmcnt(0) -    s_load_dwordx2  [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA +    s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA      s_waitcnt       lgkmcnt(0)      s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]      s_cbranch_scc0  L_NO_NEXT_TRAP // second-level trap handler not been set @@ -405,7 +411,7 @@ end      else      end -    // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic +    // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic      // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40      get_vgpr_size_bytes(s_save_ttmps_lo)      get_sgpr_size_bytes(s_save_ttmps_hi) @@ -413,13 +419,11 @@ end      s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo      s_addc_u32	    s_save_ttmps_hi, s_save_spi_init_hi, 0x0      s_and_b32	    s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF -    s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 -    ack_sqc_store_workaround() -    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 +    s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1      ack_sqc_store_workaround() -    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 +    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1      ack_sqc_store_workaround() -    s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 +    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1      ack_sqc_store_workaround()      /*	    setup Resource Contants    */ @@ -1093,7 +1097,7 @@ end      //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts	   //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore      s_setreg_b32    hwreg(HW_REG_MODE),	    s_restore_mode -    // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic +    // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic      // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40      get_vgpr_size_bytes(s_restore_ttmps_lo)      get_sgpr_size_bytes(s_restore_ttmps_hi) @@ -1101,10 +1105,9 @@ end      s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0      s_addc_u32	    s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0      s_and_b32	    s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF -    s_load_dwordx2  [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 -    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 -    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 -    s_load_dwordx2  [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 +    s_load_dwordx4  [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1 +    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1 +    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1      s_waitcnt	    lgkmcnt(0)      //reuse s_restore_m0 as a temp register diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..ea82828fdc76 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,  		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;  	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)  		q_properties->type = KFD_QUEUE_TYPE_SDMA; +	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) +		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;  	else  		return -ENOTSUPP; @@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,  	struct kfd_process_device *pdd;  	dev = kfd_device_by_id(args->gpu_id); -	if (dev == NULL) +	if (!dev)  		return -EINVAL;  	mutex_lock(&p->mutex); @@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,  		if (args->size != kfd_doorbell_process_slice(dev))  			return -EINVAL;  		offset = kfd_get_process_doorbells(dev, p); +	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { +		if (args->size != PAGE_SIZE) +			return -EINVAL; +		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); +		if (!offset) +			return -ENOMEM;  	}  	mutex_lock(&p->mutex); @@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,  	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);  	args->mmap_offset = offset; +	/* MMIO is mapped through kfd device +	 * Generate a kfd mmap offset +	 */ +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { +		args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id); +		args->mmap_offset <<= PAGE_SHIFT; +	} +  	return 0;  err_free: @@ -1551,6 +1567,32 @@ copy_from_user_failed:  	return err;  } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, +		struct kfd_process *p, void *data) +{ +	int retval; +	struct kfd_ioctl_alloc_queue_gws_args *args = data; +	struct kfd_dev *dev; + +	if (!hws_gws_support) +		return -EINVAL; + +	dev = kfd_device_by_id(args->gpu_id); +	if (!dev) { +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); +		return -EINVAL; +	} +	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) +		return -EINVAL; + +	mutex_lock(&p->mutex); +	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); +	mutex_unlock(&p->mutex); + +	args->first_gws = 0; +	return retval; +} +  static int kfd_ioctl_get_dmabuf_info(struct file *filep,  		struct kfd_process *p, void *data)  { @@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {  	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,  				kfd_ioctl_import_dmabuf, 0), +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, +			kfd_ioctl_alloc_queue_gws, 0),  };  #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls) @@ -1845,6 +1889,39 @@ err_i1:  	return retcode;  } +static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, +		      struct vm_area_struct *vma) +{ +	phys_addr_t address; +	int ret; + +	if (vma->vm_end - vma->vm_start != PAGE_SIZE) +		return -EINVAL; + +	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + +	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | +				VM_DONTDUMP | VM_PFNMAP; + +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + +	pr_debug("Process %d mapping mmio page\n" +		 "     target user address == 0x%08llX\n" +		 "     physical address    == 0x%08llX\n" +		 "     vm_flags            == 0x%04lX\n" +		 "     size                == 0x%04lX\n", +		 process->pasid, (unsigned long long) vma->vm_start, +		 address, vma->vm_flags, PAGE_SIZE); + +	ret = io_remap_pfn_range(vma, +				vma->vm_start, +				address >> PAGE_SHIFT, +				PAGE_SIZE, +				vma->vm_page_prot); +	return ret; +} + +  static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)  {  	struct kfd_process *process; @@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)  		if (!dev)  			return -ENODEV;  		return kfd_reserved_mem_mmap(dev, process, vma); +	case KFD_MMAP_TYPE_MMIO: +		if (!dev) +			return -ENODEV; +		return kfd_mmio_mmap(dev, process, vma);  	}  	return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2e7c44955f43..59f8ca4297db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {  #define polaris10_cache_info carrizo_cache_info  #define polaris11_cache_info carrizo_cache_info  #define polaris12_cache_info carrizo_cache_info +#define vegam_cache_info carrizo_cache_info  /* TODO - check & update Vega10 cache details */  #define vega10_cache_info carrizo_cache_info  #define raven_cache_info carrizo_cache_info @@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,  			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)  				props->weight = 20;  			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) -				props->weight = 15; +				props->weight = 15 * iolink->num_hops_xgmi;  			else  				props->weight = node_distance(id_from, id_to); @@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,  		pcache_info = polaris12_cache_info;  		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);  		break; +	case CHIP_VEGAM: +		pcache_info = vegam_cache_info; +		num_of_cache_types = ARRAY_SIZE(vegam_cache_info); +		break;  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_VEGA20: @@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,  static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,  			struct kfd_dev *kdev, +			struct kfd_dev *peer_kdev,  			struct crat_subtype_iolink *sub_type_hdr,  			uint32_t proximity_domain_from,  			uint32_t proximity_domain_to) @@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,  	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  	sub_type_hdr->proximity_domain_from = proximity_domain_from;  	sub_type_hdr->proximity_domain_to = proximity_domain_to; +	sub_type_hdr->num_hops_xgmi = +		amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);  	return 0;  } @@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,  				(char *)sub_type_hdr +  				sizeof(struct crat_subtype_iolink));  			ret = kfd_fill_gpu_xgmi_link_to_gpu( -				&avail_size, kdev, +				&avail_size, kdev, peer_dev->gpu,  				(struct crat_subtype_iolink *)sub_type_hdr,  				proximity_domain, nid);  			if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 7c3f192fe25f..d54ceebd346b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -274,7 +274,8 @@ struct crat_subtype_iolink {  	uint32_t	minimum_bandwidth_mbs;  	uint32_t	maximum_bandwidth_mbs;  	uint32_t	recommended_transfer_size; -	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH]; +	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1]; +	uint8_t		num_hops_xgmi;  };  /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 765b58a17dc7..9d1b026e29e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {  	.needs_iommu_device = true,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {  	.needs_iommu_device = true,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {  	.needs_iommu_device = true,  	.needs_pci_atomics = true,  	.num_sdma_engines = 1, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  };  #endif @@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = true,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = true,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = true,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = true,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = true,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0, +	.num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info vegam_device_info = { +	.asic_family = CHIP_VEGAM, +	.max_pasid_bits = 16, +	.max_no_of_hqd  = 24, +	.doorbell_size  = 4, +	.ih_ring_entry_size = 4 * sizeof(uint32_t), +	.event_interrupt_class = &event_interrupt_class_cik, +	.num_of_watch_points = 4, +	.mqd_size_aligned = MQD_SIZE_ALIGNED, +	.supports_cwsr = true, +	.needs_iommu_device = false, +	.needs_pci_atomics = true, +	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 2,  }; @@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {  	.needs_iommu_device = false,  	.needs_pci_atomics = false,  	.num_sdma_engines = 2, +	.num_xgmi_sdma_engines = 0,  	.num_sdma_queues_per_engine = 8,  }; @@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {  	{ 0x6995, &polaris12_device_info },	/* Polaris12 */  	{ 0x6997, &polaris12_device_info },	/* Polaris12 */  	{ 0x699F, &polaris12_device_info },	/* Polaris12 */ +	{ 0x694C, &vegam_device_info },		/* VegaM */ +	{ 0x694E, &vegam_device_info },		/* VegaM */ +	{ 0x694F, &vegam_device_info },		/* VegaM */  	{ 0x6860, &vega10_device_info },	/* Vega10 */  	{ 0x6861, &vega10_device_info },	/* Vega10 */  	{ 0x6862, &vega10_device_info },	/* Vega10 */ @@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	} else  		kfd->max_proc_per_quantum = hws_max_conc_proc; +	/* Allocate global GWS that is shared by all KFD processes */ +	if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, +			amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { +		dev_err(kfd_device, "Could not allocate %d gws\n", +			amdgpu_amdkfd_get_num_gws(kfd->kgd)); +		goto out; +	}  	/* calculate max size of mqds needed for queues */  	size = max_num_of_queues_per_device *  			kfd->device_info->mqd_size_aligned; @@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,  			false)) {  		dev_err(kfd_device, "Could not allocate %d bytes\n", size); -		goto out; +		goto alloc_gtt_mem_failure;  	}  	dev_info(kfd_device, "Allocated %d bytes on gart\n", size); @@ -611,6 +653,9 @@ kfd_doorbell_error:  	kfd_gtt_sa_fini(kfd);  kfd_gtt_sa_init_error:  	amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +alloc_gtt_mem_failure: +	if (hws_gws_support) +		amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);  	dev_err(kfd_device,  		"device %x:%x NOT added due to errors\n",  		kfd->pdev->vendor, kfd->pdev->device); @@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)  		kfd_doorbell_fini(kfd);  		kfd_gtt_sa_fini(kfd);  		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +		if (hws_gws_support) +			amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);  	}  	kfree(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ae381450601c..ece35c7a77b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,  					struct qcm_process_device *qpd);  static void deallocate_sdma_queue(struct device_queue_manager *dqm, -				unsigned int sdma_queue_id); +				struct queue *q);  static void kfd_process_hw_exception(struct work_struct *work);  static inline  enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)  { -	if (type == KFD_QUEUE_TYPE_SDMA) +	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)  		return KFD_MQD_TYPE_SDMA;  	return KFD_MQD_TYPE_CP;  } @@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)  	return dqm->dev->device_info->num_sdma_engines;  } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ +	return dqm->dev->device_info->num_xgmi_sdma_engines; +} +  unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)  {  	return dqm->dev->device_info->num_sdma_engines  			* dqm->dev->device_info->num_sdma_queues_per_engine;  } +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ +	return dqm->dev->device_info->num_xgmi_sdma_engines +			* dqm->dev->device_info->num_sdma_queues_per_engine; +} +  void program_sh_mem_settings(struct device_queue_manager *dqm,  					struct qcm_process_device *qpd)  { @@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)  		 * preserve the user mode ABI.  		 */  		q->doorbell_id = q->properties.queue_id; -	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { +	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || +			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {  		/* For SDMA queues on SOC15 with 8-byte doorbell, use static  		 * doorbell assignments based on the engine and queue id.  		 * The doobell index distance between RLC (2*i) and (2*i+1) @@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,  	struct kfd_dev *dev = qpd->dqm->dev;  	if (!KFD_IS_SOC15(dev->device_info->asic_family) || -	    q->properties.type == KFD_QUEUE_TYPE_SDMA) +	    q->properties.type == KFD_QUEUE_TYPE_SDMA || +	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)  		return;  	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,  	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)  		retval = create_compute_queue_nocpsch(dqm, q, qpd); -	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || +			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)  		retval = create_sdma_queue_nocpsch(dqm, q, qpd);  	else  		retval = -EINVAL; @@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)  		dqm->sdma_queue_count++; +	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) +		dqm->xgmi_sdma_queue_count++;  	/*  	 * Unconditionally increment this counter, regardless of the queue's @@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,  	struct mqd_manager *mqd_mgr;  	int retval; -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); -	if (!mqd_mgr) -		return -ENOMEM; +	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];  	retval = allocate_hqd(dqm, q);  	if (retval) @@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,  	int retval;  	struct mqd_manager *mqd_mgr; -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, -		get_mqd_type_from_queue_type(q->properties.type)); -	if (!mqd_mgr) -		return -ENOMEM; +	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +			q->properties.type)];  	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {  		deallocate_hqd(dqm, q);  	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {  		dqm->sdma_queue_count--; -		deallocate_sdma_queue(dqm, q->sdma_id); +		deallocate_sdma_queue(dqm, q); +	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +		dqm->xgmi_sdma_queue_count--; +		deallocate_sdma_queue(dqm, q);  	} else {  		pr_debug("q->properties.type %d is invalid\n",  				q->properties.type); @@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)  		retval = -ENODEV;  		goto out_unlock;  	} -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); -	if (!mqd_mgr) { -		retval = -ENOMEM; -		goto out_unlock; -	} +	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +			q->properties.type)];  	/*  	 * Eviction state logic: we only mark active queues as evicted  	 * to avoid the overhead of restoring inactive queues later @@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)  		}  	} else if (prev_active &&  		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || -		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) { +		    q->properties.type == KFD_QUEUE_TYPE_SDMA || +		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,  				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,  				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)  		retval = map_queues_cpsch(dqm);  	else if (q->properties.is_active &&  		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || -		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) { +		  q->properties.type == KFD_QUEUE_TYPE_SDMA || +		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {  		if (WARN(q->process->mm != current->mm,  			 "should only run in user thread"))  			retval = -EFAULT; @@ -571,27 +584,6 @@ out_unlock:  	return retval;  } -static struct mqd_manager *get_mqd_manager( -		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) -{ -	struct mqd_manager *mqd_mgr; - -	if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) -		return NULL; - -	pr_debug("mqd type %d\n", type); - -	mqd_mgr = dqm->mqd_mgrs[type]; -	if (!mqd_mgr) { -		mqd_mgr = mqd_manager_init(type, dqm->dev); -		if (!mqd_mgr) -			pr_err("mqd manager is NULL"); -		dqm->mqd_mgrs[type] = mqd_mgr; -	} - -	return mqd_mgr; -} -  static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,  					struct qcm_process_device *qpd)  { @@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,  	list_for_each_entry(q, &qpd->queues_list, list) {  		if (!q->properties.is_active)  			continue; -		mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); -		if (!mqd_mgr) { /* should not be here */ -			pr_err("Cannot evict queue, mqd mgr is NULL\n"); -			retval = -ENOMEM; -			goto out; -		} +		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +				q->properties.type)];  		q->properties.is_evicted = true;  		q->properties.is_active = false;  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, @@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,  	list_for_each_entry(q, &qpd->queues_list, list) {  		if (!q->properties.is_evicted)  			continue; -		mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); -		if (!mqd_mgr) { /* should not be here */ -			pr_err("Cannot restore queue, mqd mgr is NULL\n"); -			retval = -ENOMEM; -			goto out; -		} +		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +				q->properties.type)];  		q->properties.is_evicted = false;  		q->properties.is_active = true;  		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, @@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,  	retval = dqm->asic_ops.update_qpd(dqm, qpd);  	dqm->processes_count++; -	kfd_inc_compute_active(dqm->dev);  	dqm_unlock(dqm); +	/* Outside the DQM lock because under the DQM lock we can't do +	 * reclaim or take other locks that others hold while reclaiming. +	 */ +	kfd_inc_compute_active(dqm->dev); +  	return retval;  } @@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,  			list_del(&cur->list);  			kfree(cur);  			dqm->processes_count--; -			kfd_dec_compute_active(dqm->dev);  			goto out;  		}  	} @@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,  	retval = 1;  out:  	dqm_unlock(dqm); + +	/* Outside the DQM lock because under the DQM lock we can't do +	 * reclaim or take other locks that others hold while reclaiming. +	 */ +	if (!retval) +		kfd_dec_compute_active(dqm->dev); +  	return retval;  } @@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)  	INIT_LIST_HEAD(&dqm->queues);  	dqm->queue_count = dqm->next_pipe_to_allocate = 0;  	dqm->sdma_queue_count = 0; +	dqm->xgmi_sdma_queue_count = 0;  	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {  		int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)  	}  	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; -	dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; +	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; +	dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;  	return 0;  } @@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)  }  static int allocate_sdma_queue(struct device_queue_manager *dqm, -				unsigned int *sdma_queue_id) +				struct queue *q)  {  	int bit; -	if (dqm->sdma_bitmap == 0) -		return -ENOMEM; +	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { +		if (dqm->sdma_bitmap == 0) +			return -ENOMEM; +		bit = __ffs64(dqm->sdma_bitmap); +		dqm->sdma_bitmap &= ~(1ULL << bit); +		q->sdma_id = bit; +		q->properties.sdma_engine_id = q->sdma_id % +				get_num_sdma_engines(dqm); +		q->properties.sdma_queue_id = q->sdma_id / +				get_num_sdma_engines(dqm); +	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +		if (dqm->xgmi_sdma_bitmap == 0) +			return -ENOMEM; +		bit = __ffs64(dqm->xgmi_sdma_bitmap); +		dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); +		q->sdma_id = bit; +		/* sdma_engine_id is sdma id including +		 * both PCIe-optimized SDMAs and XGMI- +		 * optimized SDMAs. The calculation below +		 * assumes the first N engines are always +		 * PCIe-optimized ones +		 */ +		q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + +				q->sdma_id % get_num_xgmi_sdma_engines(dqm); +		q->properties.sdma_queue_id = q->sdma_id / +				get_num_xgmi_sdma_engines(dqm); +	} -	bit = ffs(dqm->sdma_bitmap) - 1; -	dqm->sdma_bitmap &= ~(1 << bit); -	*sdma_queue_id = bit; +	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); +	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);  	return 0;  }  static void deallocate_sdma_queue(struct device_queue_manager *dqm, -				unsigned int sdma_queue_id) +				struct queue *q)  { -	if (sdma_queue_id >= get_num_sdma_queues(dqm)) -		return; -	dqm->sdma_bitmap |= (1 << sdma_queue_id); +	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { +		if (q->sdma_id >= get_num_sdma_queues(dqm)) +			return; +		dqm->sdma_bitmap |= (1ULL << q->sdma_id); +	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) +			return; +		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); +	}  }  static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,  	struct mqd_manager *mqd_mgr;  	int retval; -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); -	if (!mqd_mgr) -		return -ENOMEM; +	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; -	retval = allocate_sdma_queue(dqm, &q->sdma_id); +	retval = allocate_sdma_queue(dqm, q);  	if (retval)  		return retval; -	q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); -	q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); -  	retval = allocate_doorbell(qpd, q);  	if (retval)  		goto out_deallocate_sdma_queue; -	pr_debug("SDMA id is:    %d\n", q->sdma_id); -	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); -	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); -  	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);  	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,  				&q->gart_mqd_addr, &q->properties); @@ -987,7 +1002,7 @@ out_uninit_mqd:  out_deallocate_doorbell:  	deallocate_doorbell(qpd, q);  out_deallocate_sdma_queue: -	deallocate_sdma_queue(dqm, q->sdma_id); +	deallocate_sdma_queue(dqm, q);  	return retval;  } @@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)  	INIT_LIST_HEAD(&dqm->queues);  	dqm->queue_count = dqm->processes_count = 0;  	dqm->sdma_queue_count = 0; +	dqm->xgmi_sdma_queue_count = 0;  	dqm->active_runlist = false; -	dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; +	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; +	dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;  	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,  	int retval;  	struct mqd_manager *mqd_mgr; -	retval = 0; - -	dqm_lock(dqm); -  	if (dqm->total_queue_count >= max_num_of_queues_per_device) {  		pr_warn("Can't create new usermode queue because %d queues were already created\n",  				dqm->total_queue_count);  		retval = -EPERM; -		goto out_unlock; +		goto out;  	} -	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { -		retval = allocate_sdma_queue(dqm, &q->sdma_id); +	if (q->properties.type == KFD_QUEUE_TYPE_SDMA || +		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +		retval = allocate_sdma_queue(dqm, q);  		if (retval) -			goto out_unlock; -		q->properties.sdma_queue_id = -			q->sdma_id / get_num_sdma_engines(dqm); -		q->properties.sdma_engine_id = -			q->sdma_id % get_num_sdma_engines(dqm); +			goto out;  	}  	retval = allocate_doorbell(qpd, q);  	if (retval)  		goto out_deallocate_sdma_queue; -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); - -	if (!mqd_mgr) { -		retval = -ENOMEM; -		goto out_deallocate_doorbell; -	} +	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +			q->properties.type)];  	/*  	 * Eviction state logic: we only mark active queues as evicted  	 * to avoid the overhead of restoring inactive queues later @@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,  		q->properties.is_evicted = (q->properties.queue_size > 0 &&  					    q->properties.queue_percent > 0 &&  					    q->properties.queue_address != 0); -  	dqm->asic_ops.init_sdma_vm(dqm, q, qpd); -  	q->properties.tba_addr = qpd->tba_addr;  	q->properties.tma_addr = qpd->tma_addr;  	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,  	if (retval)  		goto out_deallocate_doorbell; +	dqm_lock(dqm); +  	list_add(&q->list, &qpd->queues_list);  	qpd->queue_count++;  	if (q->properties.is_active) { @@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)  		dqm->sdma_queue_count++; +	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) +		dqm->xgmi_sdma_queue_count++;  	/*  	 * Unconditionally increment this counter, regardless of the queue's  	 * type or whether the queue is active. @@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,  out_deallocate_doorbell:  	deallocate_doorbell(qpd, q);  out_deallocate_sdma_queue: -	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) -		deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: -	dqm_unlock(dqm); - +	if (q->properties.type == KFD_QUEUE_TYPE_SDMA || +		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) +		deallocate_sdma_queue(dqm, q); +out:  	return retval;  } @@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,  	return 0;  } -static int unmap_sdma_queues(struct device_queue_manager *dqm, -				unsigned int sdma_engine) +static int unmap_sdma_queues(struct device_queue_manager *dqm)  { -	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, -			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, -			sdma_engine); +	int i, retval = 0; + +	for (i = 0; i < dqm->dev->device_info->num_sdma_engines + +		dqm->dev->device_info->num_xgmi_sdma_engines; i++) { +		retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, +			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); +		if (retval) +			return retval; +	} +	return retval;  }  /* dqm->lock mutex has to be locked before calling this function */ @@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,  	if (!dqm->active_runlist)  		return retval; -	pr_debug("Before destroying queues, sdma queue count is : %u\n", -		dqm->sdma_queue_count); +	pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", +		dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); -	if (dqm->sdma_queue_count > 0) { -		unmap_sdma_queues(dqm, 0); -		unmap_sdma_queues(dqm, 1); -	} +	if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) +		unmap_sdma_queues(dqm);  	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,  			filter, filter_param, false, 0); @@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,  	} -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); -	if (!mqd_mgr) { -		retval = -ENOMEM; -		goto failed; -	} +	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +			q->properties.type)];  	deallocate_doorbell(qpd, q);  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {  		dqm->sdma_queue_count--; -		deallocate_sdma_queue(dqm, q->sdma_id); +		deallocate_sdma_queue(dqm, q); +	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +		dqm->xgmi_sdma_queue_count--; +		deallocate_sdma_queue(dqm, q);  	}  	list_del(&q->list); @@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,  			qpd->reset_wavefronts = true;  	} -	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); -  	/*  	 * Unconditionally decrement this counter, regardless of the queue's  	 * type @@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,  	dqm_unlock(dqm); +	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ +	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); +  	return retval; -failed:  failed_try_destroy_debugged_queue:  	dqm_unlock(dqm); @@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,  	struct queue *q, *next;  	struct device_process_node *cur, *next_dpn;  	int retval = 0; +	bool found = false;  	dqm_lock(dqm); @@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,  			list_del(&cur->list);  			kfree(cur);  			dqm->processes_count--; -			kfd_dec_compute_active(dqm->dev); +			found = true;  			break;  		}  	}  	dqm_unlock(dqm); + +	/* Outside the DQM lock because under the DQM lock we can't do +	 * reclaim or take other locks that others hold while reclaiming. +	 */ +	if (found) +		kfd_dec_compute_active(dqm->dev); +  	return retval;  } @@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,  		goto dqm_unlock;  	} -	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); -	if (!mqd_mgr) { -		r = -ENOMEM; -		goto dqm_unlock; -	} +	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];  	if (!mqd_mgr->get_wave_state) {  		r = -EINVAL; @@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,  	struct device_process_node *cur, *next_dpn;  	enum kfd_unmap_queues_filter filter =  		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; +	bool found = false;  	retval = 0; @@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,  	list_for_each_entry(q, &qpd->queues_list, list) {  		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {  			dqm->sdma_queue_count--; -			deallocate_sdma_queue(dqm, q->sdma_id); +			deallocate_sdma_queue(dqm, q); +		} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { +			dqm->xgmi_sdma_queue_count--; +			deallocate_sdma_queue(dqm, q);  		}  		if (q->properties.is_active) @@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,  			list_del(&cur->list);  			kfree(cur);  			dqm->processes_count--; -			kfd_dec_compute_active(dqm->dev); +			found = true;  			break;  		}  	} @@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,  		qpd->reset_wavefronts = false;  	} -	/* lastly, free mqd resources */ +	dqm_unlock(dqm); + +	/* Outside the DQM lock because under the DQM lock we can't do +	 * reclaim or take other locks that others hold while reclaiming. +	 */ +	if (found) +		kfd_dec_compute_active(dqm->dev); + +	/* Lastly, free mqd resources. +	 * Do uninit_mqd() after dqm_unlock to avoid circular locking. +	 */  	list_for_each_entry_safe(q, next, &qpd->queues_list, list) { -		mqd_mgr = dqm->ops.get_mqd_manager(dqm, -			get_mqd_type_from_queue_type(q->properties.type)); -		if (!mqd_mgr) { -			retval = -ENOMEM; -			goto out; -		} +		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( +				q->properties.type)];  		list_del(&q->list);  		qpd->queue_count--;  		mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);  	} -out: -	dqm_unlock(dqm); +	return retval; +} + +static int init_mqd_managers(struct device_queue_manager *dqm) +{ +	int i, j; +	struct mqd_manager *mqd_mgr; + +	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { +		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); +		if (!mqd_mgr) { +			pr_err("mqd manager [%d] initialization failed\n", i); +			goto out_free; +		} +		dqm->mqd_mgrs[i] = mqd_mgr; +	} + +	return 0; + +out_free: +	for (j = 0; j < i; j++) { +		kfree(dqm->mqd_mgrs[j]); +		dqm->mqd_mgrs[j] = NULL; +	} + +	return -ENOMEM; +} + +/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ +static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) +{ +	int retval; +	struct kfd_dev *dev = dqm->dev; +	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; +	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * +		dev->device_info->num_sdma_engines * +		dev->device_info->num_sdma_queues_per_engine + +		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + +	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, +		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr), +		(void *)&(mem_obj->cpu_ptr), true); +  	return retval;  } @@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		dqm->ops.stop = stop_cpsch;  		dqm->ops.destroy_queue = destroy_queue_cpsch;  		dqm->ops.update_queue = update_queue; -		dqm->ops.get_mqd_manager = get_mqd_manager;  		dqm->ops.register_process = register_process;  		dqm->ops.unregister_process = unregister_process;  		dqm->ops.uninitialize = uninitialize; @@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		dqm->ops.create_queue = create_queue_nocpsch;  		dqm->ops.destroy_queue = destroy_queue_nocpsch;  		dqm->ops.update_queue = update_queue; -		dqm->ops.get_mqd_manager = get_mqd_manager;  		dqm->ops.register_process = register_process;  		dqm->ops.unregister_process = unregister_process;  		dqm->ops.initialize = initialize_nocpsch; @@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  	case CHIP_POLARIS10:  	case CHIP_POLARIS11:  	case CHIP_POLARIS12: +	case CHIP_VEGAM:  		device_queue_manager_init_vi_tonga(&dqm->asic_ops);  		break; @@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		goto out_free;  	} +	if (init_mqd_managers(dqm)) +		goto out_free; + +	if (allocate_hiq_sdma_mqd(dqm)) { +		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); +		goto out_free; +	} +  	if (!dqm->ops.initialize(dqm))  		return dqm; @@ -1772,9 +1843,17 @@ out_free:  	return NULL;  } +void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) +{ +	WARN(!mqd, "No hiq sdma mqd trunk to free"); + +	amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +} +  void device_queue_manager_uninit(struct device_queue_manager *dqm)  {  	dqm->ops.uninitialize(dqm); +	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);  	kfree(dqm);  } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 70e38a2e23b9..88b4c007696e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -48,8 +48,6 @@ struct device_process_node {   *   * @update_queue: Queue update routine.   * - * @get_mqd_manager: Returns the mqd manager according to the mqd type. - *   * @exeute_queues: Dispatches the queues list to the H/W.   *   * @register_process: This routine associates a specific process with device. @@ -97,10 +95,6 @@ struct device_queue_manager_ops {  	int	(*update_queue)(struct device_queue_manager *dqm,  				struct queue *q); -	struct mqd_manager * (*get_mqd_manager) -					(struct device_queue_manager *dqm, -					enum KFD_MQD_TYPE type); -  	int	(*register_process)(struct device_queue_manager *dqm,  					struct qcm_process_device *qpd); @@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {  	void	(*init_sdma_vm)(struct device_queue_manager *dqm,  				struct queue *q,  				struct qcm_process_device *qpd); +	struct mqd_manager *	(*mqd_manager_init)(enum KFD_MQD_TYPE type, +				 struct kfd_dev *dev);  };  /** @@ -185,10 +181,12 @@ struct device_queue_manager {  	unsigned int		processes_count;  	unsigned int		queue_count;  	unsigned int		sdma_queue_count; +	unsigned int		xgmi_sdma_queue_count;  	unsigned int		total_queue_count;  	unsigned int		next_pipe_to_allocate;  	unsigned int		*allocated_queues; -	unsigned int		sdma_bitmap; +	uint64_t		sdma_bitmap; +	uint64_t		xgmi_sdma_bitmap;  	unsigned int		vmid_bitmap;  	uint64_t		pipelines_addr;  	struct kfd_mem_obj	*pipeline_mem; @@ -201,6 +199,7 @@ struct device_queue_manager {  	/* hw exception  */  	bool			is_hws_hang;  	struct work_struct	hw_exception_work; +	struct kfd_mem_obj	hiq_sdma_mqd;  };  void device_queue_manager_init_cik( @@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);  unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);  unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);  unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);  static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)  { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index aed4c21417bf..0d26506798cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -48,6 +48,7 @@ void device_queue_manager_init_cik(  	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;  	asic_ops->update_qpd = update_qpd_cik;  	asic_ops->init_sdma_vm = init_sdma_vm; +	asic_ops->mqd_manager_init = mqd_manager_init_cik;  }  void device_queue_manager_init_cik_hawaii( @@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(  	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;  	asic_ops->update_qpd = update_qpd_cik_hawaii;  	asic_ops->init_sdma_vm = init_sdma_vm_hawaii; +	asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;  }  static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 417515332c35..e9fe39382371 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -37,6 +37,7 @@ void device_queue_manager_init_v9(  {  	asic_ops->update_qpd = update_qpd_v9;  	asic_ops->init_sdma_vm = init_sdma_vm_v9; +	asic_ops->mqd_manager_init = mqd_manager_init_v9;  }  static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index c3a5dcfe877a..3a7cb2f88366 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -54,6 +54,7 @@ void device_queue_manager_init_vi(  	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;  	asic_ops->update_qpd = update_qpd_vi;  	asic_ops->init_sdma_vm = init_sdma_vm; +	asic_ops->mqd_manager_init = mqd_manager_init_vi;  }  void device_queue_manager_init_vi_tonga( @@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(  	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;  	asic_ops->update_qpd = update_qpd_vi_tonga;  	asic_ops->init_sdma_vm = init_sdma_vm_tonga; +	asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;  }  static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6e1d41c5bf86..d674d4b3340f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,  		return; /* Presumably process exited. */  	memset(&memory_exception_data, 0, sizeof(memory_exception_data));  	memory_exception_data.gpu_id = dev->id; -	memory_exception_data.failure.imprecise = 1; +	memory_exception_data.failure.imprecise = true;  	/* Set failure reason */  	if (info) {  		memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 213ea5454d11..22a8e88b6a67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)  			case CHIP_POLARIS10:  			case CHIP_POLARIS11:  			case CHIP_POLARIS12: +			case CHIP_VEGAM:  				kfd_init_apertures_vi(pdd, id);  				break;  			case CHIP_VEGA10: @@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)  	return 0;  } - - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index f1596881f20a..1cc03b3ddbb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,  	kq->nop_packet = nop.u32all;  	switch (type) {  	case KFD_QUEUE_TYPE_DIQ: +		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; +		break;  	case KFD_QUEUE_TYPE_HIQ: -		kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, -						KFD_MQD_TYPE_HIQ); +		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];  		break;  	default:  		pr_err("Invalid queue type %d\n", type); @@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,  	case CHIP_POLARIS10:  	case CHIP_POLARIS11:  	case CHIP_POLARIS12: +	case CHIP_VEGAM:  		kernel_queue_init_vi(&kq->ops_asic_specific);  		break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 33830b1a5a54..07f02f8e4fe4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,  	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,  					sizeof(struct pm4_mes_map_queues)); -	packet->bitfields2.alloc_format = -		alloc_format__mes_map_queues__one_per_pipe_vi;  	packet->bitfields2.num_queues = 1;  	packet->bitfields2.queue_sel =  		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;  	packet->bitfields2.engine_sel =  		engine_sel__mes_map_queues__compute_vi; +	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;  	packet->bitfields2.queue_type =  		queue_type__mes_map_queues__normal_compute_vi; @@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,  			queue_type__mes_map_queues__debug_interface_queue_vi;  		break;  	case KFD_QUEUE_TYPE_SDMA: +	case KFD_QUEUE_TYPE_SDMA_XGMI:  		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +  				engine_sel__mes_map_queues__sdma0_vi;  		use_static = false; /* no static queues under SDMA */ @@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,  			engine_sel__mes_unmap_queues__compute;  		break;  	case KFD_QUEUE_TYPE_SDMA: +	case KFD_QUEUE_TYPE_SDMA_XGMI:  		packet->bitfields2.engine_sel =  			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;  		break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index bf20c6d32ef3..2adaf40027eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,  	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,  					sizeof(struct pm4_mes_map_queues)); -	packet->bitfields2.alloc_format = -		alloc_format__mes_map_queues__one_per_pipe_vi;  	packet->bitfields2.num_queues = 1;  	packet->bitfields2.queue_sel =  		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; @@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,  			queue_type__mes_map_queues__debug_interface_queue_vi;  		break;  	case KFD_QUEUE_TYPE_SDMA: +	case KFD_QUEUE_TYPE_SDMA_XGMI:  		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +  				engine_sel__mes_map_queues__sdma0_vi;  		use_static = false; /* no static queues under SDMA */ @@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,  			engine_sel__mes_unmap_queues__compute;  		break;  	case KFD_QUEUE_TYPE_SDMA: +	case KFD_QUEUE_TYPE_SDMA_XGMI:  		packet->bitfields2.engine_sel =  			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;  		break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index aed9b9b82213..9307811bc427 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -23,34 +23,54 @@  #include "kfd_mqd_manager.h"  #include "amdgpu_amdkfd.h" +#include "kfd_device_queue_manager.h" -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, -					struct kfd_dev *dev) +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)  { -	switch (dev->device_info->asic_family) { -	case CHIP_KAVERI: -		return mqd_manager_init_cik(type, dev); -	case CHIP_HAWAII: -		return mqd_manager_init_cik_hawaii(type, dev); -	case CHIP_CARRIZO: -		return mqd_manager_init_vi(type, dev); -	case CHIP_TONGA: -	case CHIP_FIJI: -	case CHIP_POLARIS10: -	case CHIP_POLARIS11: -	case CHIP_POLARIS12: -		return mqd_manager_init_vi_tonga(type, dev); -	case CHIP_VEGA10: -	case CHIP_VEGA12: -	case CHIP_VEGA20: -	case CHIP_RAVEN: -		return mqd_manager_init_v9(type, dev); -	default: -		WARN(1, "Unexpected ASIC family %u", -		     dev->device_info->asic_family); -	} +	struct kfd_mem_obj *mqd_mem_obj = NULL; + +	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); +	if (!mqd_mem_obj) +		return NULL; + +	mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem; +	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr; +	mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr; + +	return mqd_mem_obj; +} + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, +					struct queue_properties *q) +{ +	struct kfd_mem_obj *mqd_mem_obj = NULL; +	uint64_t offset; -	return NULL; +	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); +	if (!mqd_mem_obj) +		return NULL; + +	offset = (q->sdma_engine_id * +		dev->device_info->num_sdma_queues_per_engine + +		q->sdma_queue_id) * +		dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; + +	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + +	mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem +				+ offset); +	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; +	mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t) +				dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); + +	return mqd_mem_obj; +} + +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, +			struct kfd_mem_obj *mqd_mem_obj) +{ +	WARN_ON(!mqd_mem_obj->gtt_mem); +	kfree(mqd_mem_obj);  }  void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index f8261313ae7b..56af256a191b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -99,8 +99,16 @@ struct mqd_manager {  	struct mutex	mqd_mutex;  	struct kfd_dev	*dev; +	uint32_t mqd_size;  }; +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev); + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, +					struct queue_properties *q); +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, +				struct kfd_mem_obj *mqd_mem_obj); +  void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,  		const uint32_t *cu_mask, uint32_t cu_mask_count,  		uint32_t *se_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ae90a99909ef..6e8509ec29d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,  		m->compute_static_thread_mgmt_se3);  } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, +					struct queue_properties *q) +{ +	struct kfd_mem_obj *mqd_mem_obj; + +	if (q->type == KFD_QUEUE_TYPE_HIQ) +		return allocate_hiq_mqd(kfd); + +	if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd), +			&mqd_mem_obj)) +		return NULL; + +	return mqd_mem_obj; +} + +  static int init_mqd(struct mqd_manager *mm, void **mqd,  		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,  		struct queue_properties *q) @@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,  	uint64_t addr;  	struct cik_mqd *m;  	int retval; +	struct kfd_dev *kfd = mm->dev; -	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), -					mqd_mem_obj); - -	if (retval != 0) +	*mqd_mem_obj = allocate_mqd(kfd, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  {  	int retval;  	struct cik_sdma_rlc_registers *m; +	struct kfd_dev *dev = mm->dev; -	retval = kfd_gtt_sa_allocate(mm->dev, -					sizeof(struct cik_sdma_rlc_registers), -					mqd_mem_obj); - -	if (retval != 0) +	*mqd_mem_obj = allocate_sdma_mqd(dev, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; @@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,  	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);  } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, -				struct kfd_mem_obj *mqd_mem_obj) -{ -	kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -}  static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,  		    uint32_t queue_id, struct queue_properties *p, @@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,  		mqd->update_mqd = update_mqd;  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct cik_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_HIQ:  		mqd->init_mqd = init_mqd_hiq; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma; +		mqd->load_mqd = load_mqd; +		mqd->update_mqd = update_mqd_hiq; +		mqd->destroy_mqd = destroy_mqd; +		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct cik_mqd); +#if defined(CONFIG_DEBUG_FS) +		mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif +		break; +	case KFD_MQD_TYPE_DIQ: +		mqd->init_mqd = init_mqd_hiq;  		mqd->uninit_mqd = uninit_mqd;  		mqd->load_mqd = load_mqd;  		mqd->update_mqd = update_mqd_hiq;  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct cik_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_SDMA:  		mqd->init_mqd = init_mqd_sdma; -		mqd->uninit_mqd = uninit_mqd_sdma; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma;  		mqd->load_mqd = load_mqd_sdma;  		mqd->update_mqd = update_mqd_sdma;  		mqd->destroy_mqd = destroy_mqd_sdma;  		mqd->is_occupied = is_occupied_sdma; +		mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;  #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 9dbba609450e..4750338199b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,  		m->compute_static_thread_mgmt_se3);  } -static int init_mqd(struct mqd_manager *mm, void **mqd, -			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, -			struct queue_properties *q) +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, +		struct queue_properties *q)  {  	int retval; -	uint64_t addr; -	struct v9_mqd *m; -	struct kfd_dev *kfd = mm->dev; +	struct kfd_mem_obj *mqd_mem_obj = NULL; + +	if (q->type == KFD_QUEUE_TYPE_HIQ) +		return allocate_hiq_mqd(kfd);  	/* From V9,  for CWSR, the control stack is located on the next page  	 * boundary after the mqd, we will use the gtt allocation function  	 * instead of sub-allocation function.  	 */  	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { -		*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); -		if (!*mqd_mem_obj) -			return -ENOMEM; +		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); +		if (!mqd_mem_obj) +			return NULL;  		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,  			ALIGN(q->ctl_stack_size, PAGE_SIZE) +  				ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), -			&((*mqd_mem_obj)->gtt_mem), -			&((*mqd_mem_obj)->gpu_addr), -			(void *)&((*mqd_mem_obj)->cpu_ptr), true); -	} else -		retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), -				mqd_mem_obj); -	if (retval != 0) +			&(mqd_mem_obj->gtt_mem), +			&(mqd_mem_obj->gpu_addr), +			(void *)&(mqd_mem_obj->cpu_ptr), true); +	} else { +		retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), +				&mqd_mem_obj); +	} + +	if (retval) { +		kfree(mqd_mem_obj); +		return NULL; +	} + +	return mqd_mem_obj; + +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, +			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, +			struct queue_properties *q) +{ +	int retval; +	uint64_t addr; +	struct v9_mqd *m; +	struct kfd_dev *kfd = mm->dev; + +	*mqd_mem_obj = allocate_mqd(kfd, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  {  	int retval;  	struct v9_sdma_mqd *m; +	struct kfd_dev *dev = mm->dev; - -	retval = kfd_gtt_sa_allocate(mm->dev, -			sizeof(struct v9_sdma_mqd), -			mqd_mem_obj); - -	if (retval != 0) +	*mqd_mem_obj = allocate_sdma_mqd(dev, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  	return retval;  } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, -		struct kfd_mem_obj *mqd_mem_obj) -{ -	kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} -  static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  		uint32_t pipe_id, uint32_t queue_id,  		struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied;  		mqd->get_wave_state = get_wave_state; +		mqd->mqd_size = sizeof(struct v9_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_HIQ:  		mqd->init_mqd = init_mqd_hiq; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma; +		mqd->load_mqd = load_mqd; +		mqd->update_mqd = update_mqd_hiq; +		mqd->destroy_mqd = destroy_mqd; +		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct v9_mqd); +#if defined(CONFIG_DEBUG_FS) +		mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif +		break; +	case KFD_MQD_TYPE_DIQ: +		mqd->init_mqd = init_mqd_hiq;  		mqd->uninit_mqd = uninit_mqd;  		mqd->load_mqd = load_mqd;  		mqd->update_mqd = update_mqd_hiq;  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct v9_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_SDMA:  		mqd->init_mqd = init_mqd_sdma; -		mqd->uninit_mqd = uninit_mqd_sdma; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma;  		mqd->load_mqd = load_mqd_sdma;  		mqd->update_mqd = update_mqd_sdma;  		mqd->destroy_mqd = destroy_mqd_sdma;  		mqd->is_occupied = is_occupied_sdma; +		mqd->mqd_size = sizeof(struct v9_sdma_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;  #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 6469b3456f00..b550dea9b10a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,  		m->compute_static_thread_mgmt_se3);  } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, +					struct queue_properties *q) +{ +	struct kfd_mem_obj *mqd_mem_obj; + +	if (q->type == KFD_QUEUE_TYPE_HIQ) +		return allocate_hiq_mqd(kfd); + +	if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd), +			&mqd_mem_obj)) +		return NULL; + +	return mqd_mem_obj; +} +  static int init_mqd(struct mqd_manager *mm, void **mqd,  			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,  			struct queue_properties *q) @@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,  	int retval;  	uint64_t addr;  	struct vi_mqd *m; +	struct kfd_dev *kfd = mm->dev; -	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), -			mqd_mem_obj); -	if (retval != 0) +	*mqd_mem_obj = allocate_mqd(kfd, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  {  	int retval;  	struct vi_sdma_mqd *m; +	struct kfd_dev *dev = mm->dev; - -	retval = kfd_gtt_sa_allocate(mm->dev, -			sizeof(struct vi_sdma_mqd), -			mqd_mem_obj); - -	if (retval != 0) +	*mqd_mem_obj = allocate_sdma_mqd(dev, q); +	if (!*mqd_mem_obj)  		return -ENOMEM;  	m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  	memset(m, 0, sizeof(struct vi_sdma_mqd));  	*mqd = m; -	if (gart_addr != NULL) +	if (gart_addr)  		*gart_addr = (*mqd_mem_obj)->gpu_addr;  	retval = mm->update_mqd(mm, m, q); @@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,  	return retval;  } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, -		struct kfd_mem_obj *mqd_mem_obj) -{ -	kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} -  static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  		uint32_t pipe_id, uint32_t queue_id,  		struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied;  		mqd->get_wave_state = get_wave_state; +		mqd->mqd_size = sizeof(struct vi_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_HIQ:  		mqd->init_mqd = init_mqd_hiq; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma; +		mqd->load_mqd = load_mqd; +		mqd->update_mqd = update_mqd_hiq; +		mqd->destroy_mqd = destroy_mqd; +		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct vi_mqd); +#if defined(CONFIG_DEBUG_FS) +		mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif +		break; +	case KFD_MQD_TYPE_DIQ: +		mqd->init_mqd = init_mqd_hiq;  		mqd->uninit_mqd = uninit_mqd;  		mqd->load_mqd = load_mqd;  		mqd->update_mqd = update_mqd_hiq;  		mqd->destroy_mqd = destroy_mqd;  		mqd->is_occupied = is_occupied; +		mqd->mqd_size = sizeof(struct vi_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd;  #endif  		break;  	case KFD_MQD_TYPE_SDMA:  		mqd->init_mqd = init_mqd_sdma; -		mqd->uninit_mqd = uninit_mqd_sdma; +		mqd->uninit_mqd = uninit_mqd_hiq_sdma;  		mqd->load_mqd = load_mqd_sdma;  		mqd->update_mqd = update_mqd_sdma;  		mqd->destroy_mqd = destroy_mqd_sdma;  		mqd->is_occupied = is_occupied_sdma; +		mqd->mqd_size = sizeof(struct vi_sdma_mqd);  #if defined(CONFIG_DEBUG_FS)  		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;  #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 045a229436a0..808194663a7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,  	process_count = pm->dqm->processes_count;  	queue_count = pm->dqm->queue_count; -	compute_queue_count = queue_count - pm->dqm->sdma_queue_count; +	compute_queue_count = queue_count - pm->dqm->sdma_queue_count - +				pm->dqm->xgmi_sdma_queue_count;  	/* check if there is over subscription  	 * Note: the arbitration between the number of VMIDs and @@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)  	case CHIP_POLARIS10:  	case CHIP_POLARIS11:  	case CHIP_POLARIS12: +	case CHIP_VEGAM:  		pm->pmf = &kfd_vi_pm_funcs;  		break;  	case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index f2bcf5c092ea..49ab66b703fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -176,8 +176,7 @@ struct pm4_mes_map_process {  	union {  		struct { -			uint32_t num_gws:6; -			uint32_t reserved7:1; +			uint32_t num_gws:7;  			uint32_t sdma_enable:1;  			uint32_t num_oac:4;  			uint32_t reserved8:4; @@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {  queue_type__mes_map_queues__low_latency_static_queue_vi = 3  }; -enum mes_map_queues_alloc_format_enum { -	alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; -  enum mes_map_queues_engine_sel_enum {  	engine_sel__mes_map_queues__compute_vi = 0,  	engine_sel__mes_map_queues__sdma0_vi = 2, @@ -277,9 +271,11 @@ struct pm4_mes_map_queues {  		struct {  			uint32_t reserved1:4;  			enum mes_map_queues_queue_sel_enum queue_sel:2; -			uint32_t reserved2:15; +			uint32_t reserved5:6; +			uint32_t gws_control_queue:1; +			uint32_t reserved2:8;  			enum mes_map_queues_queue_type_enum queue_type:3; -			enum mes_map_queues_alloc_format_enum alloc_format:2; +			uint32_t reserved3:2;  			enum mes_map_queues_engine_sel_enum engine_sel:3;  			uint32_t num_queues:3;  		} bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 7c8d9b357749..5466cfe1c3cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {  queue_type__mes_map_queues__low_latency_static_queue_vi = 3  }; -enum mes_map_queues_alloc_format_vi_enum { -	alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; -  enum mes_map_queues_engine_sel_vi_enum {  	engine_sel__mes_map_queues__compute_vi = 0,  	engine_sel__mes_map_queues__sdma0_vi = 2, @@ -240,7 +235,7 @@ struct pm4_mes_map_queues {  			enum mes_map_queues_queue_sel_vi_enum queue_sel:2;  			uint32_t reserved2:15;  			enum mes_map_queues_queue_type_vi_enum queue_type:3; -			enum mes_map_queues_alloc_format_vi_enum alloc_format:2; +			uint32_t reserved3:2;  			enum mes_map_queues_engine_sel_vi_enum engine_sel:3;  			uint32_t num_queues:3;  		} bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 487d5da337c1..b61dc53f42d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -59,6 +59,7 @@  #define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)  #define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)  #define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)  #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)  #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ @@ -160,6 +161,11 @@ extern int noretry;   */  extern int halt_if_hws_hang; +/* + * Whether MEC FW support GWS barriers + */ +extern bool hws_gws_support; +  enum cache_policy {  	cache_policy_coherent,  	cache_policy_noncoherent @@ -188,6 +194,7 @@ struct kfd_device_info {  	bool needs_iommu_device;  	bool needs_pci_atomics;  	unsigned int num_sdma_engines; +	unsigned int num_xgmi_sdma_engines;  	unsigned int num_sdma_queues_per_engine;  }; @@ -258,7 +265,7 @@ struct kfd_dev {  	bool interrupts_active;  	/* Debug manager */ -	struct kfd_dbgmgr           *dbgmgr; +	struct kfd_dbgmgr *dbgmgr;  	/* Firmware versions */  	uint16_t mec_fw_version; @@ -282,6 +289,9 @@ struct kfd_dev {  	/* Compute Profile ref. count */  	atomic_t compute_profile; + +	/* Global GWS resource shared b/t processes*/ +	void *gws;  };  enum kfd_mempool { @@ -329,7 +339,8 @@ enum kfd_queue_type  {  	KFD_QUEUE_TYPE_COMPUTE,  	KFD_QUEUE_TYPE_SDMA,  	KFD_QUEUE_TYPE_HIQ, -	KFD_QUEUE_TYPE_DIQ +	KFD_QUEUE_TYPE_DIQ, +	KFD_QUEUE_TYPE_SDMA_XGMI  };  enum kfd_queue_format { @@ -444,6 +455,9 @@ struct queue_properties {   *   * @device: The kfd device that created this queue.   * + * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL + * otherwise. + *   * This structure represents user mode compute queues.   * It contains all the necessary data to handle such queues.   * @@ -465,6 +479,7 @@ struct queue {  	struct kfd_process	*process;  	struct kfd_dev		*device; +	void *gws;  };  /* @@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {  	KFD_MQD_TYPE_HIQ,		/* for hiq */  	KFD_MQD_TYPE_CP,		/* for cp queues and diq */  	KFD_MQD_TYPE_SDMA,		/* for sdma queues */ +	KFD_MQD_TYPE_DIQ,		/* for diq */  	KFD_MQD_TYPE_MAX  }; @@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);  void print_queue_properties(struct queue_properties *q);  void print_queue(struct queue *q); -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, -					struct kfd_dev *dev);  struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,  		struct kfd_dev *dev);  struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, @@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,  			struct queue_properties *p);  int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,  			struct queue_properties *p); +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, +			void *gws);  struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,  						unsigned int qid);  int pqm_get_wave_state(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index fcaaf93681ac..c2c570e6e54f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -26,6 +26,7 @@  #include "kfd_device_queue_manager.h"  #include "kfd_priv.h"  #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h"  static inline struct process_queue_node *get_queue_by_qid(  			struct process_queue_manager *pqm, unsigned int qid) @@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)  	pdd->already_dequeued = true;  } +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, +			void *gws) +{ +	struct kfd_dev *dev = NULL; +	struct process_queue_node *pqn; +	struct kfd_process_device *pdd; +	struct kgd_mem *mem = NULL; +	int ret; + +	pqn = get_queue_by_qid(pqm, qid); +	if (!pqn) { +		pr_err("Queue id does not match any known queue\n"); +		return -EINVAL; +	} + +	if (pqn->q) +		dev = pqn->q->device; +	if (WARN_ON(!dev)) +		return -ENODEV; + +	pdd = kfd_get_process_device_data(dev, pqm->process); +	if (!pdd) { +		pr_err("Process device data doesn't exist\n"); +		return -EINVAL; +	} + +	/* Only allow one queue per process can have GWS assigned */ +	if (gws && pdd->qpd.num_gws) +		return -EINVAL; + +	if (!gws && pdd->qpd.num_gws == 0) +		return -EINVAL; + +	if (gws) +		ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, +			gws, &mem); +	else +		ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, +			pqn->q->gws); +	if (unlikely(ret)) +		return ret; + +	pqn->q->gws = mem; +	pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + +	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, +							pqn->q); +} +  void kfd_process_dequeue_from_all_devices(struct kfd_process *p)  {  	struct kfd_process_device *pdd; @@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,  	switch (type) {  	case KFD_QUEUE_TYPE_SDMA: -		if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { -			pr_err("Over-subscription is not allowed for SDMA.\n"); +	case KFD_QUEUE_TYPE_SDMA_XGMI: +		if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count +			>= get_num_sdma_queues(dev->dqm)) || +			(type == KFD_QUEUE_TYPE_SDMA_XGMI && +			dev->dqm->xgmi_sdma_queue_count +			>= get_num_xgmi_sdma_queues(dev->dqm))) { +			pr_debug("Over-subscription is not allowed for SDMA.\n");  			retval = -EPERM;  			goto err_create_queue;  		} @@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)  			if (retval != -ETIME)  				goto err_destroy_queue;  		} + +		if (pqn->q->gws) { +			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, +				pqn->q->gws); +			pdd->qpd.num_gws = 0; +		} +  		kfree(pqn->q->properties.cu_mask);  		pqn->q->properties.cu_mask = NULL;  		uninit_queue(pqn->q); @@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)  			q = pqn->q;  			switch (q->properties.type) {  			case KFD_QUEUE_TYPE_SDMA: +			case KFD_QUEUE_TYPE_SDMA_XGMI:  				seq_printf(m, "  SDMA queue on device %x\n",  					   q->device->id);  				mqd_type = KFD_MQD_TYPE_SDMA; @@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)  					   q->properties.type, q->device->id);  				continue;  			} -			mqd_mgr = q->device->dqm->ops.get_mqd_manager( -				q->device->dqm, mqd_type); +			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];  		} else if (pqn->kq) {  			q = pqn->kq->queue;  			mqd_mgr = pqn->kq->mqd_mgr; @@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)  			case KFD_QUEUE_TYPE_DIQ:  				seq_printf(m, "  DIQ on device %x\n",  					   pqn->kq->dev->id); -				mqd_type = KFD_MQD_TYPE_HIQ;  				break;  			default:  				seq_printf(m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 769dbc7be8cb..d241a8672599 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,  			dev->node_props.lds_size_in_kb);  	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",  			dev->node_props.gds_size_in_kb); +	sysfs_show_32bit_prop(buffer, "num_gws", +			dev->node_props.num_gws);  	sysfs_show_32bit_prop(buffer, "wave_front_size",  			dev->node_props.wave_front_size);  	sysfs_show_32bit_prop(buffer, "array_count", @@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,  			dev->node_props.drm_render_minor);  	sysfs_show_64bit_prop(buffer, "hive_id",  			dev->node_props.hive_id); +	sysfs_show_32bit_prop(buffer, "num_sdma_engines", +			dev->node_props.num_sdma_engines); +	sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines", +			dev->node_props.num_sdma_xgmi_engines);  	if (dev->gpu) {  		log_max_watch_addr = @@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)  			local_mem_info.local_mem_size_public;  	buf[0] = gpu->pdev->devfn; -	buf[1] = gpu->pdev->subsystem_vendor; -	buf[2] = gpu->pdev->subsystem_device; +	buf[1] = gpu->pdev->subsystem_vendor | +		(gpu->pdev->subsystem_device << 16); +	buf[2] = pci_domain_nr(gpu->pdev->bus);  	buf[3] = gpu->pdev->device;  	buf[4] = gpu->pdev->bus->number;  	buf[5] = lower_32_bits(local_mem_size); @@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  		gpu->shared_resources.drm_render_minor;  	dev->node_props.hive_id = gpu->hive_id; +	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; +	dev->node_props.num_sdma_xgmi_engines = +				gpu->device_info->num_xgmi_sdma_engines; +	dev->node_props.num_gws = (hws_gws_support && +		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? +		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;  	kfd_fill_mem_clk_max_info(dev);  	kfd_fill_iolink_non_crat_info(dev); @@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	case CHIP_POLARIS10:  	case CHIP_POLARIS11:  	case CHIP_POLARIS12: +	case CHIP_VEGAM:  		pr_debug("Adding doorbell packet type capability\n");  		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<  			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 84710cfd23c2..276354aa0fcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -65,6 +65,7 @@ struct kfd_node_properties {  	uint32_t max_waves_per_simd;  	uint32_t lds_size_in_kb;  	uint32_t gds_size_in_kb; +	uint32_t num_gws;  	uint32_t wave_front_size;  	uint32_t array_count;  	uint32_t simd_arrays_per_engine; @@ -78,6 +79,8 @@ struct kfd_node_properties {  	uint32_t max_engine_clk_fcompute;  	uint32_t max_engine_clk_ccompute;  	int32_t  drm_render_minor; +	uint32_t num_sdma_engines; +	uint32_t num_sdma_xgmi_engines;  	uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];  }; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 0c25baded852..5c826faae240 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,6 @@ config DRM_AMD_DC  	bool "AMD DC - Enable new display engine"  	default y  	select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) -	select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)  	help  	  Choose this option if you want to use the new display engine  	  support for AMDGPU. This adds required support for Vega and @@ -17,11 +16,6 @@ config DRM_AMD_DC_DCN1_0  	help  	  RV family support for display engine -config DRM_AMD_DC_DCN1_01 -	def_bool n -	help -	  RV2 family for display engine -  config DEBUG_KERNEL_DC  	bool "Enable kgdb break in DC"  	depends on DRM_AMD_DC diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 443b13ec268d..d52efe1da02e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -29,6 +29,7 @@  #include "dm_services_types.h"  #include "dc.h"  #include "dc/inc/core_types.h" +#include "dal_asic_id.h"  #include "vid.h"  #include "amdgpu.h" @@ -615,6 +616,10 @@ error:  static void amdgpu_dm_fini(struct amdgpu_device *adev)  {  	amdgpu_dm_destroy_drm_device(&adev->dm); + +	/* DC Destroy TODO: Replace destroy DAL */ +	if (adev->dm.dc) +		dc_destroy(&adev->dm.dc);  	/*  	 * TODO: pageflip, vlank interrupt  	 * @@ -629,9 +634,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)  		mod_freesync_destroy(adev->dm.freesync_module);  		adev->dm.freesync_module = NULL;  	} -	/* DC Destroy TODO: Replace destroy DAL */ -	if (adev->dm.dc) -		dc_destroy(&adev->dm.dc);  	mutex_destroy(&adev->dm.dc_lock); @@ -640,7 +642,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)  static int load_dmcu_fw(struct amdgpu_device *adev)  { -	const char *fw_name_dmcu; +	const char *fw_name_dmcu = NULL;  	int r;  	const struct dmcu_firmware_header_v1_0 *hdr; @@ -663,7 +665,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev)  	case CHIP_VEGA20:  		return 0;  	case CHIP_RAVEN: -		fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) +		if (ASICREV_IS_PICASSO(adev->external_rev_id)) +			fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +		else if (ASICREV_IS_RAVEN2(adev->external_rev_id)) +			fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +		else +#endif +			return 0;  		break;  	default:  		DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); @@ -2584,7 +2593,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,  		address->type = PLN_ADDR_TYPE_GRAPHICS;  		address->grph.addr.low_part = lower_32_bits(afb->address);  		address->grph.addr.high_part = upper_32_bits(afb->address); -	} else { +	} else if (format < SURFACE_PIXEL_FORMAT_INVALID) {  		uint64_t chroma_addr = afb->address + fb->offsets[1];  		plane_size->video.luma_size.x = 0; @@ -2959,16 +2968,16 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,  }  static enum dc_color_depth -convert_color_depth_from_display_info(const struct drm_connector *connector) +convert_color_depth_from_display_info(const struct drm_connector *connector, +				      const struct drm_connector_state *state)  { -	struct dm_connector_state *dm_conn_state = -		to_dm_connector_state(connector->state);  	uint32_t bpc = connector->display_info.bpc; -	/* TODO: Remove this when there's support for max_bpc in drm */ -	if (dm_conn_state && bpc > dm_conn_state->max_bpc) -		/* Round down to nearest even number. */ -		bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1); +	if (state) { +		bpc = state->max_bpc; +		/* Round down to the nearest even number. */ +		bpc = bpc - (bpc & 1); +	}  	switch (bpc) {  	case 0: @@ -3086,11 +3095,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_  } -static void -fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, -					     const struct drm_display_mode *mode_in, -					     const struct drm_connector *connector, -					     const struct dc_stream_state *old_stream) +static void fill_stream_properties_from_drm_display_mode( +	struct dc_stream_state *stream, +	const struct drm_display_mode *mode_in, +	const struct drm_connector *connector, +	const struct drm_connector_state *connector_state, +	const struct dc_stream_state *old_stream)  {  	struct dc_crtc_timing *timing_out = &stream->timing;  	const struct drm_display_info *info = &connector->display_info; @@ -3113,7 +3123,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,  	timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;  	timing_out->display_color_depth = convert_color_depth_from_display_info( -			connector); +		connector, connector_state);  	timing_out->scan_type = SCANNING_TYPE_NODATA;  	timing_out->hdmi_vic = 0; @@ -3310,6 +3320,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,  {  	struct drm_display_mode *preferred_mode = NULL;  	struct drm_connector *drm_connector; +	const struct drm_connector_state *con_state = +		dm_state ? &dm_state->base : NULL;  	struct dc_stream_state *stream = NULL;  	struct drm_display_mode mode = *drm_mode;  	bool native_mode_found = false; @@ -3382,10 +3394,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,  	*/  	if (!scale || mode_refresh != preferred_refresh)  		fill_stream_properties_from_drm_display_mode(stream, -			&mode, &aconnector->base, NULL); +			&mode, &aconnector->base, con_state, NULL);  	else  		fill_stream_properties_from_drm_display_mode(stream, -			&mode, &aconnector->base, old_stream); +			&mode, &aconnector->base, con_state, old_stream);  	update_stream_scaling_settings(&mode, dm_state, stream); @@ -3610,9 +3622,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,  	} else if (property == adev->mode_info.underscan_property) {  		dm_new_state->underscan_enable = val;  		ret = 0; -	} else if (property == adev->mode_info.max_bpc_property) { -		dm_new_state->max_bpc = val; -		ret = 0;  	} else if (property == adev->mode_info.abm_level_property) {  		dm_new_state->abm_level = val;  		ret = 0; @@ -3658,9 +3667,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,  	} else if (property == adev->mode_info.underscan_property) {  		*val = dm_state->underscan_enable;  		ret = 0; -	} else if (property == adev->mode_info.max_bpc_property) { -		*val = dm_state->max_bpc; -		ret = 0;  	} else if (property == adev->mode_info.abm_level_property) {  		*val = dm_state->abm_level;  		ret = 0; @@ -3717,7 +3723,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)  		state->underscan_enable = false;  		state->underscan_hborder = 0;  		state->underscan_vborder = 0; -		state->max_bpc = 8;  		__drm_atomic_helper_connector_reset(connector, &state->base);  	} @@ -3743,7 +3748,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)  	new_state->underscan_enable = state->underscan_enable;  	new_state->underscan_hborder = state->underscan_hborder;  	new_state->underscan_vborder = state->underscan_vborder; -	new_state->max_bpc = state->max_bpc;  	return &new_state->base;  } @@ -4708,6 +4712,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,  		amdgpu_dm_connector->num_modes =  				drm_add_edid_modes(connector, edid); +		/* sorting the probed modes before calling function +		 * amdgpu_dm_get_native_mode() since EDID can have +		 * more than one preferred mode. The modes that are +		 * later in the probed mode list could be of higher +		 * and preferred resolution. For example, 3840x2160 +		 * resolution in base EDID preferred timing and 4096x2160 +		 * preferred resolution in DID extension block later. +		 */ +		drm_mode_sort(&connector->probed_modes);  		amdgpu_dm_get_native_mode(connector);  	} else {  		amdgpu_dm_connector->num_modes = 0; @@ -4787,9 +4800,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,  	drm_object_attach_property(&aconnector->base.base,  				adev->mode_info.underscan_vborder_property,  				0); -	drm_object_attach_property(&aconnector->base.base, -				adev->mode_info.max_bpc_property, -				0); + +	drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); + +	/* This defaults to the max in the range, but we want 8bpc. */ +	aconnector->base.state->max_bpc = 8; +	aconnector->base.state->max_requested_bpc = 8;  	if (connector_type == DRM_MODE_CONNECTOR_eDP &&  	    dc_is_dmcu_initialized(adev->dm.dc)) { @@ -5072,12 +5088,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,  	int x, y;  	int xorigin = 0, yorigin = 0; -	if (!crtc || !plane->state->fb) { -		position->enable = false; -		position->x = 0; -		position->y = 0; +	position->enable = false; +	position->x = 0; +	position->y = 0; + +	if (!crtc || !plane->state->fb)  		return 0; -	}  	if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||  	    (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { @@ -5091,6 +5107,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,  	x = plane->state->crtc_x;  	y = plane->state->crtc_y; +	if (x <= -amdgpu_crtc->max_cursor_width || +	    y <= -amdgpu_crtc->max_cursor_height) +		return 0; +  	if (crtc->primary->state) {  		/* avivo cursor are offset into the total surface */  		x += crtc->primary->state->src_x >> 16; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 978ff14a7d45..b0ce44422e90 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -304,7 +304,6 @@ struct dm_connector_state {  	enum amdgpu_rmx_type scaling;  	uint8_t underscan_vborder;  	uint8_t underscan_hborder; -	uint8_t max_bpc;  	bool underscan_enable;  	bool freesync_capable;  	uint8_t abm_level; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index fd5266a58297..12bc7ee66b18 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1313,6 +1313,8 @@ static enum bp_result bios_parser_get_encoder_cap_info(  			ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;  	info->HDMI_6GB_EN = (record->encodercaps &  			ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0; +	info->DP_IS_USB_C = (record->encodercaps & +			ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;  	return BP_RESULT_OK;  } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 8196f3bb10c7..53deba42007a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2(  		return true;  #if defined(CONFIG_DRM_AMD_DC_DCN1_0)  	case DCN_VERSION_1_0: -		*h = dal_cmd_tbl_helper_dce112_get_table2(); -		return true; -#endif - -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	case DCN_VERSION_1_01:  		*h = dal_cmd_tbl_helper_dce112_get_table2();  		return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 18c775a950cc..4e17af2b63dc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -169,9 +169,14 @@ static bool create_links(  		link = link_create(&link_init_params);  		if (link) { -			dc->links[dc->link_count] = link; -			link->dc = dc; -			++dc->link_count; +			if (dc->config.edp_not_connected && +					link->connector_signal == SIGNAL_TYPE_EDP) { +				link_destroy(&link); +			} else { +				dc->links[dc->link_count] = link; +				link->dc = dc; +				++dc->link_count; +			}  		}  	} @@ -1136,10 +1141,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c  	/* Program all planes within new context*/  	for (i = 0; i < context->stream_count; i++) {  		const struct dc_link *link = context->streams[i]->link; -		struct dc_stream_status *status; - -		if (context->streams[i]->apply_seamless_boot_optimization) -			context->streams[i]->apply_seamless_boot_optimization = false;  		if (!context->streams[i]->mode_changed)  			continue; @@ -1164,9 +1165,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c  			}  		} -		status = dc_stream_get_status_from_state(context, context->streams[i]); -		context->streams[i]->out.otg_offset = status->primary_otg_inst; -  		CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}",  				context->streams[i]->timing.h_addressable,  				context->streams[i]->timing.v_addressable, @@ -1331,71 +1329,94 @@ static bool is_surface_in_context(  static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)  {  	union surface_update_flags *update_flags = &u->surface->update_flags; +	enum surface_update_type update_type = UPDATE_TYPE_FAST;  	if (!u->plane_info)  		return UPDATE_TYPE_FAST; -	if (u->plane_info->color_space != u->surface->color_space) +	if (u->plane_info->color_space != u->surface->color_space) {  		update_flags->bits.color_space_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	} -	if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) +	if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) {  		update_flags->bits.horizontal_mirror_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	} -	if (u->plane_info->rotation != u->surface->rotation) +	if (u->plane_info->rotation != u->surface->rotation) {  		update_flags->bits.rotation_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_FULL); +	} -	if (u->plane_info->format != u->surface->format) +	if (u->plane_info->format != u->surface->format) {  		update_flags->bits.pixel_format_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_FULL); +	} -	if (u->plane_info->stereo_format != u->surface->stereo_format) +	if (u->plane_info->stereo_format != u->surface->stereo_format) {  		update_flags->bits.stereo_format_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_FULL); +	} -	if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) +	if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) {  		update_flags->bits.per_pixel_alpha_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	} -	if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) +	if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) {  		update_flags->bits.global_alpha_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	} + +	if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) { +		update_flags->bits.sdr_white_level = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	}  	if (u->plane_info->dcc.enable != u->surface->dcc.enable  			|| u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks -			|| u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) +			|| u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) {  		update_flags->bits.dcc_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	}  	if (resource_pixel_format_to_bpp(u->plane_info->format) != -			resource_pixel_format_to_bpp(u->surface->format)) +			resource_pixel_format_to_bpp(u->surface->format)) {  		/* different bytes per element will require full bandwidth  		 * and DML calculation  		 */  		update_flags->bits.bpp_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_FULL); +	}  	if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch  			|| u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch -			|| u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) +			|| u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) {  		update_flags->bits.plane_size_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +	}  	if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,  			sizeof(union dc_tiling_info)) != 0) {  		update_flags->bits.swizzle_change = 1; +		elevate_update_type(&update_type, UPDATE_TYPE_MED); +  		/* todo: below are HW dependent, we should add a hook to  		 * DCE/N resource and validated there.  		 */ -		if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) +		if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {  			/* swizzled mode requires RQ to be setup properly,  			 * thus need to run DML to calculate RQ settings  			 */  			update_flags->bits.bandwidth_change = 1; +			elevate_update_type(&update_type, UPDATE_TYPE_FULL); +		}  	} -	if (update_flags->bits.rotation_change -			|| update_flags->bits.stereo_format_change -			|| update_flags->bits.pixel_format_change -			|| update_flags->bits.bpp_change -			|| update_flags->bits.bandwidth_change -			|| update_flags->bits.output_tf_change) -		return UPDATE_TYPE_FULL; - -	return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST; +	/* This should be UPDATE_TYPE_FAST if nothing has changed. */ +	return update_type;  }  static enum surface_update_type get_scaling_info_update_type( @@ -1475,6 +1496,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc,  	type = get_scaling_info_update_type(u);  	elevate_update_type(&overall_type, type); +	if (u->flip_addr) +		update_flags->bits.addr_update = 1; +  	if (u->in_transfer_func)  		update_flags->bits.in_transfer_func_change = 1; @@ -1792,10 +1816,15 @@ static void commit_planes_for_stream(struct dc *dc,  	if (dc->optimize_seamless_boot && surface_count > 0) {  		/* Optimize seamless boot flag keeps clocks and watermarks high until  		 * first flip. After first flip, optimization is required to lower -		 * bandwidth. +		 * bandwidth. Important to note that it is expected UEFI will +		 * only light up a single display on POST, therefore we only expect +		 * one stream with seamless boot flag set.  		 */ -		dc->optimize_seamless_boot = false; -		dc->optimized_required = true; +		if (stream->apply_seamless_boot_optimization) { +			stream->apply_seamless_boot_optimization = false; +			dc->optimize_seamless_boot = false; +			dc->optimized_required = true; +		}  	}  	if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 83d121510ef5..ca50ede37183 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -45,8 +45,10 @@ enum dc_color_space_type {  	COLOR_SPACE_RGB_LIMITED_TYPE,  	COLOR_SPACE_YCBCR601_TYPE,  	COLOR_SPACE_YCBCR709_TYPE, +	COLOR_SPACE_YCBCR2020_TYPE,  	COLOR_SPACE_YCBCR601_LIMITED_TYPE, -	COLOR_SPACE_YCBCR709_LIMITED_TYPE +	COLOR_SPACE_YCBCR709_LIMITED_TYPE, +	COLOR_SPACE_YCBCR709_BLACK_TYPE,  };  static const struct tg_color black_color_format[] = { @@ -80,7 +82,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {  	{ COLOR_SPACE_YCBCR709_TYPE,  		{ 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,  				0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, -  	/* TODO: correct values below */  	{ COLOR_SPACE_YCBCR601_LIMITED_TYPE,  		{ 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, @@ -88,6 +89,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {  	{ COLOR_SPACE_YCBCR709_LIMITED_TYPE,  		{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,  				0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, +	{ COLOR_SPACE_YCBCR2020_TYPE, +		{ 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2, +				0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} }, +	{ COLOR_SPACE_YCBCR709_BLACK_TYPE, +		{ 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, +				0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },  };  static bool is_rgb_type( @@ -149,6 +156,16 @@ static bool is_ycbcr709_type(  	return ret;  } +static bool is_ycbcr2020_type( +	enum dc_color_space color_space) +{ +	bool ret = false; + +	if (color_space == COLOR_SPACE_2020_YCBCR) +		ret = true; +	return ret; +} +  static bool is_ycbcr709_limited_type(  		enum dc_color_space color_space)  { @@ -174,7 +191,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)  		type = COLOR_SPACE_YCBCR601_LIMITED_TYPE;  	else if (is_ycbcr709_limited_type(color_space))  		type = COLOR_SPACE_YCBCR709_LIMITED_TYPE; - +	else if (is_ycbcr2020_type(color_space)) +		type = COLOR_SPACE_YCBCR2020_TYPE; +	else if (color_space == COLOR_SPACE_YCBCR709) +		type = COLOR_SPACE_YCBCR709_BLACK_TYPE; +	else if (color_space == COLOR_SPACE_YCBCR709_BLACK) +		type = COLOR_SPACE_YCBCR709_BLACK_TYPE;  	return type;  } @@ -206,6 +228,7 @@ void color_space_to_black_color(  	switch (colorspace) {  	case COLOR_SPACE_YCBCR601:  	case COLOR_SPACE_YCBCR709: +	case COLOR_SPACE_YCBCR709_BLACK:  	case COLOR_SPACE_YCBCR601_LIMITED:  	case COLOR_SPACE_YCBCR709_LIMITED:  	case COLOR_SPACE_2020_YCBCR: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index b37ecc3ede61..e7236539f867 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -704,6 +704,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)  	if (new_connection_type != dc_connection_none) {  		link->type = new_connection_type; +		link->link_state_valid = false;  		/* From Disconnected-to-Connected. */  		switch (link->connector_signal) { @@ -906,10 +907,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)  			sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;  		/* Connectivity log: detection */ -		for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) { +		for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {  			CONN_DATA_DETECT(link, -					&sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE], -					EDID_BLOCK_SIZE, +					&sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE], +					DC_EDID_BLOCK_SIZE,  					"%s: [Block %d] ", sink->edid_caps.display_name, i);  		} @@ -2631,6 +2632,8 @@ void core_link_enable_stream(  			stream->phy_pix_clk,  			pipe_ctx->stream_res.audio != NULL); +	pipe_ctx->stream->link->link_state_valid = true; +  	if (dc_is_dvi_signal(pipe_ctx->stream->signal))  		pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute(  			pipe_ctx->stream_res.stream_enc, @@ -2713,17 +2716,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)  {  	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;  	struct dc_stream_state *stream = pipe_ctx->stream; +	struct dc_link *link = stream->sink->link;  	core_dc->hwss.blank_stream(pipe_ctx);  	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)  		deallocate_mst_payload(pipe_ctx); -	if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) -		dal_ddc_service_write_scdc_data( -			stream->link->ddc, 0, -			stream->timing.flags.LTE_340MCSC_SCRAMBLE); +	if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { +		struct ext_hdmi_settings settings = {0}; +		enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id; +		unsigned short masked_chip_caps = link->chip_caps & +				EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; +		//Need to inform that sink is going to use legacy HDMI mode. +		dal_ddc_service_write_scdc_data( +			link->ddc, +			165000,//vbios only handles 165Mhz. +			false); +		if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { +			/* DP159, Retimer settings */ +			if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) +				write_i2c_retimer_setting(pipe_ctx, +						false, false, &settings); +			else +				write_i2c_default_retimer_setting(pipe_ctx, +						false, false); +		} else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { +			/* PI3EQX1204, Redriver settings */ +			write_i2c_redriver_setting(pipe_ctx, false); +		} +	}  	core_dc->hwss.disable_stream(pipe_ctx, option);  	disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index eac7186e4f08..b7952f39f3fc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -93,10 +93,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)  #if defined(CONFIG_DRM_AMD_DC_DCN1_0)  	case FAMILY_RV:  		dc_version = DCN_VERSION_1_0; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  		if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))  			dc_version = DCN_VERSION_1_01; -#endif  		break;  #endif  	default: @@ -147,9 +145,7 @@ struct resource_pool *dc_create_resource_pool(struct dc  *dc,  #if defined(CONFIG_DRM_AMD_DC_DCN1_0)  	case DCN_VERSION_1_0: -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	case DCN_VERSION_1_01: -#endif  		res_pool = dcn10_create_resource_pool(init_data, dc);  		break;  #endif @@ -1184,24 +1180,27 @@ static int acquire_first_split_pipe(  	int i;  	for (i = 0; i < pool->pipe_count; i++) { -		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - -		if (pipe_ctx->top_pipe && -				pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { -			pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe; -			if (pipe_ctx->bottom_pipe) -				pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe; - -			memset(pipe_ctx, 0, sizeof(*pipe_ctx)); -			pipe_ctx->stream_res.tg = pool->timing_generators[i]; -			pipe_ctx->plane_res.hubp = pool->hubps[i]; -			pipe_ctx->plane_res.ipp = pool->ipps[i]; -			pipe_ctx->plane_res.dpp = pool->dpps[i]; -			pipe_ctx->stream_res.opp = pool->opps[i]; -			pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; -			pipe_ctx->pipe_idx = i; - -			pipe_ctx->stream = stream; +		struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i]; + +		if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) && +				split_pipe->top_pipe->plane_state == split_pipe->plane_state) { +			split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe; +			if (split_pipe->bottom_pipe) +				split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe; + +			if (split_pipe->top_pipe->plane_state) +				resource_build_scaling_params(split_pipe->top_pipe); + +			memset(split_pipe, 0, sizeof(*split_pipe)); +			split_pipe->stream_res.tg = pool->timing_generators[i]; +			split_pipe->plane_res.hubp = pool->hubps[i]; +			split_pipe->plane_res.ipp = pool->ipps[i]; +			split_pipe->plane_res.dpp = pool->dpps[i]; +			split_pipe->stream_res.opp = pool->opps[i]; +			split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst; +			split_pipe->pipe_idx = i; + +			split_pipe->stream = stream;  			return i;  		}  	} @@ -1647,46 +1646,6 @@ static int acquire_first_free_pipe(  	return -1;  } -static struct stream_encoder *find_first_free_match_stream_enc_for_link( -		struct resource_context *res_ctx, -		const struct resource_pool *pool, -		struct dc_stream_state *stream) -{ -	int i; -	int j = -1; -	struct dc_link *link = stream->link; - -	for (i = 0; i < pool->stream_enc_count; i++) { -		if (!res_ctx->is_stream_enc_acquired[i] && -				pool->stream_enc[i]) { -			/* Store first available for MST second display -			 * in daisy chain use case */ -			j = i; -			if (pool->stream_enc[i]->id == -					link->link_enc->preferred_engine) -				return pool->stream_enc[i]; -		} -	} - -	/* -	 * below can happen in cases when stream encoder is acquired: -	 * 1) for second MST display in chain, so preferred engine already -	 * acquired; -	 * 2) for another link, which preferred engine already acquired by any -	 * MST configuration. -	 * -	 * If signal is of DP type and preferred engine not found, return last available -	 * -	 * TODO - This is just a patch up and a generic solution is -	 * required for non DP connectors. -	 */ - -	if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) -		return pool->stream_enc[j]; - -	return NULL; -} -  static struct audio *find_first_free_audio(  		struct resource_context *res_ctx,  		const struct resource_pool *pool, @@ -1998,7 +1957,7 @@ enum dc_status resource_map_pool_resources(  	pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];  	pipe_ctx->stream_res.stream_enc = -		find_first_free_match_stream_enc_for_link( +		dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(  			&context->res_ctx, pool, stream);  	if (!pipe_ctx->stream_res.stream_enc) @@ -2354,7 +2313,18 @@ static void set_avi_info_frame(  			break;  		}  	} +	/* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/  	hdmi_info.bits.VIC0_VIC7 = vic; +	if (vic >= 128) +		hdmi_info.bits.header.version = 3; +	/* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1), +	 * the Source shall use 20 AVI InfoFrame Version 4 +	 */ +	if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED && +			hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) { +		hdmi_info.bits.header.version = 4; +		hdmi_info.bits.header.length = 14; +	}  	/* pixel repetition  	 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel @@ -2373,12 +2343,19 @@ static void set_avi_info_frame(  	hdmi_info.bits.bar_right = (stream->timing.h_total  			- stream->timing.h_border_right + 1); +    /* Additional Colorimetry Extension +     * Used in conduction with C0-C1 and EC0-EC2 +     * 0 = DCI-P3 RGB (D65) +     * 1 = DCI-P3 RGB (theater) +     */ +	hdmi_info.bits.ACE0_ACE3 = 0; +  	/* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */  	check_sum = &hdmi_info.packet_raw_data.sb[0]; -	*check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; +	*check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version; -	for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) +	for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++)  		*check_sum += hdmi_info.packet_raw_data.sb[byte_index];  	/* one byte complement */ @@ -2425,21 +2402,6 @@ static void set_spd_info_packet(  	*info_packet = stream->vrr_infopacket;  } -static void set_dp_sdp_info_packet( -		struct dc_info_packet *info_packet, -		struct dc_stream_state *stream) -{ -	/* SPD info packet for custom sdp message */ - -	/* Return if false. If true, -	 * set the corresponding bit in the info packet -	 */ -	if (!stream->dpsdp_infopacket.valid) -		return; - -	*info_packet = stream->dpsdp_infopacket; -} -  static void set_hdr_static_info_packet(  		struct dc_info_packet *info_packet,  		struct dc_stream_state *stream) @@ -2495,7 +2457,6 @@ void dc_resource_state_copy_construct(  		if (cur_pipe->bottom_pipe)  			cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; -  	}  	for (i = 0; i < dst_ctx->stream_count; i++) { @@ -2536,7 +2497,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)  	info->spd.valid = false;  	info->hdrsmd.valid = false;  	info->vsc.valid = false; -	info->dpsdp.valid = false;  	signal = pipe_ctx->stream->signal; @@ -2556,8 +2516,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)  		set_spd_info_packet(&info->spd, pipe_ctx->stream);  		set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream); - -		set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream);  	}  	patch_gamut_packet_checksum(&info->gamut); @@ -2644,6 +2602,10 @@ bool pipe_need_reprogram(  	if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream))  		return true; +	if (false == pipe_ctx_old->stream->link->link_state_valid && +		false == pipe_ctx_old->stream->dpms_off) +		return true; +  	return false;  } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 96e97d25d639..b723ffc8ea25 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -47,8 +47,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)  	if (dc_is_dvi_signal(stream->signal)) {  		if (stream->ctx->dc->caps.dual_link_dvi && -		    (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && -		    sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) +			(stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && +			sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)  			stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;  		else  			stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; @@ -371,42 +371,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)  	return 0;  } -static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx, -		const uint8_t  *custom_sdp_message, -		unsigned int sdp_message_size) -{ -	uint8_t i; -	struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - -	/* set valid info */ -	info->dpsdp.valid = true; - -	/* set sdp message header */ -	info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */ -	info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */ -	info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */ -	info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */ - -	/* set sdp message data */ -	for (i = 0; i < 32; i++) -		info->dpsdp.sb[i] = (custom_sdp_message[i+4]); - -} - -static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx) -{ -	struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - -	/* in-valid info */ -	info->dpsdp.valid = false; -} -  bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,  		const uint8_t *custom_sdp_message,  		unsigned int sdp_message_size)  {  	int i; -	struct dc  *core_dc; +	struct dc  *dc;  	struct resource_context *res_ctx;  	if (stream == NULL) { @@ -414,8 +384,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,  		return false;  	} -	core_dc = stream->ctx->dc; -	res_ctx = &core_dc->current_state->res_ctx; +	dc = stream->ctx->dc; +	res_ctx = &dc->current_state->res_ctx;  	for (i = 0; i < MAX_PIPES; i++) {  		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; @@ -423,11 +393,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,  		if (pipe_ctx->stream != stream)  			continue; -		build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size); - -		core_dc->hwss.update_info_frame(pipe_ctx); +		if (dc->hwss.send_immediate_sdp_message != NULL) +			dc->hwss.send_immediate_sdp_message(pipe_ctx, +								custom_sdp_message, +								sdp_message_size); +		else +			DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n", +			__func__); -		invalid_dp_sdp_info_frame(pipe_ctx);  	}  	return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 44e4b0465587..566111ff463e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@  #include "inc/hw/dmcu.h"  #include "dml/display_mode_lib.h" -#define DC_VER "3.2.27" +#define DC_VER "3.2.31"  #define MAX_SURFACES 3  #define MAX_PLANES 6 @@ -205,6 +205,7 @@ struct dc_config {  	bool disable_fractional_pwm;  	bool allow_seamless_boot_optimization;  	bool power_down_display_on_boot; +	bool edp_not_connected;  };  enum visual_confirm { @@ -540,12 +541,14 @@ struct dc_plane_status {  union surface_update_flags {  	struct { +		uint32_t addr_update:1;  		/* Medium updates */  		uint32_t dcc_change:1;  		uint32_t color_space_change:1;  		uint32_t horizontal_mirror_change:1;  		uint32_t per_pixel_alpha_change:1;  		uint32_t global_alpha_change:1; +		uint32_t sdr_white_level:1;  		uint32_t rotation_change:1;  		uint32_t swizzle_change:1;  		uint32_t scaling_change:1; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 5e6c5eff49cf..2d0acf109360 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -297,7 +297,7 @@ void generic_reg_wait(const struct dc_context *ctx,  	int i;  	/* something is terribly wrong if time out is > 200ms. (5Hz) */ -	ASSERT(delay_between_poll_us * time_out_num_tries <= 200000); +	ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000);  	for (i = 0; i <= time_out_num_tries; i++) {  		if (i) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index da55d623647a..c91b8aad78c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -534,6 +534,7 @@ enum dc_color_space {  	COLOR_SPACE_DOLBYVISION,  	COLOR_SPACE_APPCTRL,  	COLOR_SPACE_CUSTOMPOINTS, +	COLOR_SPACE_YCBCR709_BLACK,  };  enum dc_dither_option { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 7b9429e30d82..094009127e25 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -75,6 +75,7 @@ struct dc_link {  	enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse  */  	bool is_hpd_filter_disabled;  	bool dp_ss_off; +	bool link_state_valid;  	/* caps is the same as reported_link_cap. link_traing use  	 * reported_link_cap. Will clean up.  TODO diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6c2a3d9a4c2e..92a670894c05 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -104,7 +104,7 @@ struct dc_context {  #define DC_MAX_EDID_BUFFER_SIZE 1024 -#define EDID_BLOCK_SIZE 128 +#define DC_EDID_BLOCK_SIZE 128  #define MAX_SURFACE_NUM 4  #define NUM_PIXEL_FORMATS 10 diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index da96229db53a..2959c3c9390b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -473,6 +473,8 @@ void dce_abm_destroy(struct abm **abm)  {  	struct dce_abm *abm_dce = TO_DCE_ABM(*abm); +	abm_dce->base.funcs->set_abm_immediate_disable(*abm); +  	kfree(abm_dce);  	*abm = NULL;  } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 963686380738..6b2e207777f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -241,6 +241,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state(  	return low_req_clk;  } +/* TODO: remove use the two broken down functions */  static int dce_set_clock(  	struct clk_mgr *clk_mgr,  	int requested_clk_khz) @@ -336,6 +337,75 @@ int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz)  	return actual_clock;  } +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz) +{ +	struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); +	struct bp_set_dce_clock_parameters dce_clk_params; +	struct dc_bios *bp = clk_mgr->ctx->dc_bios; +	struct dc *core_dc = clk_mgr->ctx->dc; +	struct dmcu *dmcu = core_dc->res_pool->dmcu; +	int actual_clock = requested_clk_khz; +	/* Prepare to program display clock*/ +	memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + +	/* Make sure requested clock isn't lower than minimum threshold*/ +	if (requested_clk_khz > 0) +		requested_clk_khz = max(requested_clk_khz, +				clk_mgr_dce->dentist_vco_freq_khz / 62); + +	dce_clk_params.target_clock_frequency = requested_clk_khz; +	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; +	dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; + +	bp->funcs->set_dce_clock(bp, &dce_clk_params); +	actual_clock = dce_clk_params.target_clock_frequency; + +	/* +	 * from power down, we need mark the clock state as ClocksStateNominal +	 * from HWReset, so when resume we will call pplib voltage regulator. +	 */ +	if (requested_clk_khz == 0) +		clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + + +	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { +		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { +			if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock) +				dmcu->funcs->set_psr_wait_loop(dmcu, +						actual_clock / 1000 / 7); +		} +	} + +	clk_mgr_dce->dfs_bypass_disp_clk = actual_clock; +	return actual_clock; + +} + +int dce112_set_dprefclk(struct clk_mgr *clk_mgr) +{ +	struct bp_set_dce_clock_parameters dce_clk_params; +	struct dc_bios *bp = clk_mgr->ctx->dc_bios; + +	memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + +	/*Program DP ref Clock*/ +	/*VBIOS will determine DPREFCLK frequency, so we don't set it*/ +	dce_clk_params.target_clock_frequency = 0; +	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; +	dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; +	if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev)) +		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = +			(dce_clk_params.pll_id == +					CLOCK_SOURCE_COMBO_DISPLAY_PLL0); +	else +		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; + +	bp->funcs->set_dce_clock(bp, &dce_clk_params); + +	/* Returns the dp_refclk that was set */ +	return dce_clk_params.target_clock_frequency; +} +  static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce)  {  	struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; @@ -782,22 +852,22 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr,  	dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context);  } -static const struct clk_mgr_funcs dce120_funcs = { +static struct clk_mgr_funcs dce120_funcs = {  	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,  	.update_clocks = dce12_update_clocks  }; -static const struct clk_mgr_funcs dce112_funcs = { +static struct clk_mgr_funcs dce112_funcs = {  	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,  	.update_clocks = dce112_update_clocks  }; -static const struct clk_mgr_funcs dce110_funcs = { +static struct clk_mgr_funcs dce110_funcs = {  	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,  	.update_clocks = dce11_update_clocks,  }; -static const struct clk_mgr_funcs dce_funcs = { +static struct clk_mgr_funcs dce_funcs = {  	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,  	.update_clocks = dce_update_clocks  }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index c8f8c442142a..cca0c95d8cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -39,6 +39,11 @@  #define CLK_COMMON_REG_LIST_DCN_BASE() \  	SR(DENTIST_DISPCLK_CNTL) +#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \ +	.MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \ +	.MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \ +	.MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67 +  #define CLK_SF(reg_name, field_name, post_fix)\  	.field_name = reg_name ## __ ## field_name ## post_fix @@ -50,23 +55,39 @@  	CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\  	CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh) +#define CLK_MASK_SH_LIST_RV1(mask_sh) \ +	CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\ +	CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\ +	CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\ +	CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh), + +  #define CLK_REG_FIELD_LIST(type) \  	type DPREFCLK_SRC_SEL; \  	type DENTIST_DPREFCLK_WDIVIDER; \  	type DENTIST_DISPCLK_WDIVIDER; \  	type DENTIST_DISPCLK_CHG_DONE; +#define VBIOS_SMU_REG_FIELD_LIST(type) \ +	type CONTENT; +  struct clk_mgr_shift {  	CLK_REG_FIELD_LIST(uint8_t) +	VBIOS_SMU_REG_FIELD_LIST(uint32_t)  };  struct clk_mgr_mask {  	CLK_REG_FIELD_LIST(uint32_t) +	VBIOS_SMU_REG_FIELD_LIST(uint32_t)  };  struct clk_mgr_registers {  	uint32_t DPREFCLK_CNTL;  	uint32_t DENTIST_DISPCLK_CNTL; + +	uint32_t MP1_SMN_C2PMSG_67; +	uint32_t MP1_SMN_C2PMSG_83; +	uint32_t MP1_SMN_C2PMSG_91;  };  struct state_dependent_clocks { @@ -168,6 +189,8 @@ void dce110_fill_display_configs(  	struct dm_pp_display_configuration *pp_display_cfg);  int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz); +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz); +int dce112_set_dprefclk(struct clk_mgr *clk_mgr);  struct clk_mgr *dce_clk_mgr_create(  	struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index f70437aae8e0..df422440845b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -183,8 +183,8 @@ static bool calculate_fb_and_fractional_fb_divider(  *RETURNS:  * It fills the PLLSettings structure with PLL Dividers values  * if calculated values are within required tolerance -* It returns	- true if eror is within tolerance -*		- false if eror is not within tolerance +* It returns	- true if error is within tolerance +*		- false if error is not within tolerance  */  static bool calc_fb_divider_checking_tolerance(  		struct calc_pll_clock_source *calc_pll_cs, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index cd26161bcc4d..526aab438374 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -268,6 +268,8 @@ static bool setup_engine(  	struct dce_i2c_hw *dce_i2c_hw)  {  	uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; +	/* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/ +	REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);  	if (dce_i2c_hw->setup_limit != 0)  		i2c_setup_limit = dce_i2c_hw->setup_limit; @@ -322,8 +324,6 @@ static void release_engine(  	set_speed(dce_i2c_hw, dce_i2c_hw->original_speed); -	/* Release I2C */ -	REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1);  	/* Reset HW engine */  	{ @@ -343,6 +343,9 @@ static void release_engine(  	/* HW I2c engine - clock gating feature */  	if (!dce_i2c_hw->engine_keep_power_up_count)  		REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0); +	/* Release I2C after reset, so HW or DMCU could use it */ +	REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1, +		DC_I2C_SW_USE_I2C_REG_REQ, 0);  } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h index 575500755b2e..f718e3d396f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h @@ -105,6 +105,7 @@ enum {  	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\  	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\  	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\ +	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\  	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\  	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\  	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\ @@ -146,6 +147,7 @@ struct dce_i2c_shift {  	uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;  	uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY;  	uint8_t DC_I2C_SW_DONE_USING_I2C_REG; +	uint8_t DC_I2C_SW_USE_I2C_REG_REQ;  	uint8_t DC_I2C_NO_QUEUED_SW_GO;  	uint8_t DC_I2C_SW_PRIORITY;  	uint8_t DC_I2C_SOFT_RESET; @@ -184,6 +186,7 @@ struct dce_i2c_mask {  	uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;  	uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY;  	uint32_t DC_I2C_SW_DONE_USING_I2C_REG; +	uint32_t DC_I2C_SW_USE_I2C_REG_REQ;  	uint32_t DC_I2C_NO_QUEUED_SW_GO;  	uint32_t DC_I2C_SW_PRIORITY;  	uint32_t DC_I2C_SOFT_RESET; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 14309fe6f2e6..61fe2596fdb3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -418,6 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(  			break;  		case COLOR_SPACE_YCBCR709:  		case COLOR_SPACE_YCBCR709_LIMITED: +		case COLOR_SPACE_YCBCR709_BLACK:  			misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */  			misc1 = misc1 & ~0x80; /* bit7 = 0*/  			dynamic_range_ycbcr = 1; /*bt709*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index e938bf9986d3..d7a531e9700f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -867,13 +867,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s  	return DC_FAIL_SURFACE_VALIDATE;  } +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream) +{ +	int i; +	int j = -1; +	struct dc_link *link = stream->link; + +	for (i = 0; i < pool->stream_enc_count; i++) { +		if (!res_ctx->is_stream_enc_acquired[i] && +				pool->stream_enc[i]) { +			/* Store first available for MST second display +			 * in daisy chain use case +			 */ +			j = i; +			if (pool->stream_enc[i]->id == +					link->link_enc->preferred_engine) +				return pool->stream_enc[i]; +		} +	} + +	/* +	 * below can happen in cases when stream encoder is acquired: +	 * 1) for second MST display in chain, so preferred engine already +	 * acquired; +	 * 2) for another link, which preferred engine already acquired by any +	 * MST configuration. +	 * +	 * If signal is of DP type and preferred engine not found, return last available +	 * +	 * TODO - This is just a patch up and a generic solution is +	 * required for non DP connectors. +	 */ + +	if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) +		return pool->stream_enc[j]; + +	return NULL; +} +  static const struct resource_funcs dce100_res_pool_funcs = {  	.destroy = dce100_destroy_resource_pool,  	.link_enc_create = dce100_link_encoder_create,  	.validate_bandwidth = dce100_validate_bandwidth,  	.validate_plane = dce100_validate_plane,  	.add_stream_to_ctx = dce100_add_stream_to_ctx, -	.validate_global = dce100_validate_global +	.validate_global = dce100_validate_global, +	.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link  };  static bool construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h index 2f366d66635d..fecab7c560f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h @@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx(  		struct dc_state *new_ctx,  		struct dc_stream_state *dc_stream); +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream); +  #endif /* DCE100_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7ac50ab1b762..69f215967af3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -242,6 +242,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params,  	prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;  	switch (plane_state->format) { +	case SURFACE_PIXEL_FORMAT_GRPH_RGB565: +		prescale_params->scale = 0x2082; +		break;  	case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:  	case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:  		prescale_params->scale = 0x2020; @@ -1296,6 +1299,11 @@ static enum dc_status dce110_enable_stream_timing(  		pipe_ctx->stream_res.tg->funcs->program_timing(  				pipe_ctx->stream_res.tg,  				&stream->timing, +				0, +				0, +				0, +				0, +				pipe_ctx->stream->signal,  				true);  	} @@ -1488,10 +1496,11 @@ static void disable_vga_and_power_gate_all_controllers(  	}  } -static struct dc_link *get_link_for_edp(struct dc *dc) +static struct dc_link *get_edp_link(struct dc *dc)  {  	int i; +	// report any eDP links, even unconnected DDI's  	for (i = 0; i < dc->link_count; i++) {  		if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)  			return dc->links[i]; @@ -1499,23 +1508,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc)  	return NULL;  } -static struct dc_link *get_link_for_edp_to_turn_off( +static struct dc_link *get_edp_link_with_sink(  		struct dc *dc,  		struct dc_state *context)  {  	int i;  	struct dc_link *link = NULL; -	/* check if eDP panel is suppose to be set mode, if yes, no need to disable */ -	for (i = 0; i < context->stream_count; i++) { -		if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { -			if (context->streams[i]->dpms_off == true) -				return context->streams[i]->sink->link; -			else -				return NULL; -		} -	} -  	/* check if there is an eDP panel not in use */  	for (i = 0; i < dc->link_count; i++) {  		if (dc->links[i]->local_sink && @@ -1538,59 +1537,53 @@ static struct dc_link *get_link_for_edp_to_turn_off(  void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)  {  	int i; -	struct dc_link *edp_link_to_turnoff = NULL; -	struct dc_link *edp_link = get_link_for_edp(dc); -	bool can_edp_fast_boot_optimize = false; -	bool apply_edp_fast_boot_optimization = false; +	struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context); +	struct dc_link *edp_link = get_edp_link(dc); +	bool can_apply_edp_fast_boot = false;  	bool can_apply_seamless_boot = false; -	for (i = 0; i < context->stream_count; i++) { -		if (context->streams[i]->apply_seamless_boot_optimization) { -			can_apply_seamless_boot = true; -			break; -		} -	} -  	if (dc->hwss.init_pipes)  		dc->hwss.init_pipes(dc, context); -	if (edp_link) { -		/* this seems to cause blank screens on DCE8 */ -		if ((dc->ctx->dce_version == DCE_VERSION_8_0) || -		    (dc->ctx->dce_version == DCE_VERSION_8_1) || -		    (dc->ctx->dce_version == DCE_VERSION_8_3)) -			can_edp_fast_boot_optimize = false; -		else -			can_edp_fast_boot_optimize = -				edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); +	// Check fastboot support, disable on DCE8 because of blank screens +	if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 && +		    dc->ctx->dce_version != DCE_VERSION_8_1 && +		    dc->ctx->dce_version != DCE_VERSION_8_3) { + +		// enable fastboot if backend is enabled on eDP +		if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { +			/* Find eDP stream and set optimization flag */ +			for (i = 0; i < context->stream_count; i++) { +				if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { +					context->streams[i]->apply_edp_fast_boot_optimization = true; +					can_apply_edp_fast_boot = true; +					break; +				} +			} +		}  	} -	if (can_edp_fast_boot_optimize) -		edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context); - -	/* if OS doesn't light up eDP and eDP link is available, we want to disable -	 * If resume from S4/S5, should optimization. -	 */ -	if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) { -		/* Find eDP stream and set optimization flag */ -		for (i = 0; i < context->stream_count; i++) { -			if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { -				context->streams[i]->apply_edp_fast_boot_optimization = true; -				apply_edp_fast_boot_optimization = true; -			} +	// Check seamless boot support +	for (i = 0; i < context->stream_count; i++) { +		if (context->streams[i]->apply_seamless_boot_optimization) { +			can_apply_seamless_boot = true; +			break;  		}  	} -	if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) { -		if (edp_link_to_turnoff) { +	/* eDP should not have stream in resume from S4 and so even with VBios post +	 * it should get turned off +	 */ +	if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) { +		if (edp_link_with_sink) {  			/*turn off backlight before DP_blank and encoder powered down*/ -			dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); +			dc->hwss.edp_backlight_control(edp_link_with_sink, false);  		}  		/*resume from S3, no vbios posting, no need to power down again*/  		power_down_all_hw_blocks(dc);  		disable_vga_and_power_gate_all_controllers(dc); -		if (edp_link_to_turnoff) -			dc->hwss.edp_power_control(edp_link_to_turnoff, false); +		if (edp_link_with_sink) +			dc->hwss.edp_power_control(edp_link_with_sink, false);  	}  	bios_set_scratch_acc_mode_change(dc->ctx->dc_bios);  } @@ -2030,8 +2023,10 @@ enum dc_status dce110_apply_ctx_to_hw(  		if (pipe_ctx->stream == NULL)  			continue; -		if (pipe_ctx->stream == pipe_ctx_old->stream) +		if (pipe_ctx->stream == pipe_ctx_old->stream && +			pipe_ctx->stream->link->link_state_valid) {  			continue; +		}  		if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))  			continue; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index dcd04e9ea76b..f982c8b196cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1097,6 +1097,11 @@ static struct pipe_ctx *dce110_acquire_underlay(  		pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg,  				&stream->timing, +				0, +				0, +				0, +				0, +				pipe_ctx->stream->signal,  				false);  		pipe_ctx->stream_res.tg->funcs->enable_advanced_request( @@ -1129,6 +1134,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)  	*pool = NULL;  } +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream) +{ +	int i; +	int j = -1; +	struct dc_link *link = stream->link; + +	for (i = 0; i < pool->stream_enc_count; i++) { +		if (!res_ctx->is_stream_enc_acquired[i] && +				pool->stream_enc[i]) { +			/* Store first available for MST second display +			 * in daisy chain use case +			 */ +			j = i; +			if (pool->stream_enc[i]->id == +					link->link_enc->preferred_engine) +				return pool->stream_enc[i]; +		} +	} + +	/* +	 * For CZ and later, we can allow DIG FE and BE to differ for all display types +	 */ + +	if (j >= 0) +		return pool->stream_enc[j]; + +	return NULL; +} +  static const struct resource_funcs dce110_res_pool_funcs = {  	.destroy = dce110_destroy_resource_pool, @@ -1137,7 +1174,8 @@ static const struct resource_funcs dce110_res_pool_funcs = {  	.validate_plane = dce110_validate_plane,  	.acquire_idle_pipe_for_layer = dce110_acquire_underlay,  	.add_stream_to_ctx = dce110_add_stream_to_ctx, -	.validate_global = dce110_validate_global +	.validate_global = dce110_validate_global, +	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link  };  static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h index e5f168c1f8c8..aa4531e0800e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h @@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool(  	struct dc *dc,  	struct hw_asic_id asic_id); +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream); +  #endif /* __DC_RESOURCE_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 1b2fe0df347f..5f7c2c5641c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,  void dce110_tg_program_timing(struct timing_generator *tg,  	const struct dc_crtc_timing *timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios)  {  	if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 734d4965dab1..768ccf27ada9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,  void dce110_tg_program_timing(struct timing_generator *tg,  	const struct dc_crtc_timing *timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios);  bool dce110_tg_is_blanked(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index a3cef60380ed..a13a2f58944e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg,  static void dce110_timing_generator_v_program_timing(struct timing_generator *tg,  	const struct dc_crtc_timing *timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios)  {  	if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index a480b15f6885..cdf759b0f5f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -993,7 +993,8 @@ static const struct resource_funcs dce112_res_pool_funcs = {  	.validate_bandwidth = dce112_validate_bandwidth,  	.validate_plane = dce100_validate_plane,  	.add_stream_to_ctx = dce112_add_stream_to_ctx, -	.validate_global = dce112_validate_global +	.validate_global = dce112_validate_global, +	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link  };  static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 6d49c7143c67..9e6a5d84b0a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -480,7 +480,7 @@ static const struct dc_debug_options debug_defaults = {  		.disable_clock_gate = true,  }; -struct clock_source *dce120_clock_source_create( +static struct clock_source *dce120_clock_source_create(  	struct dc_context *ctx,  	struct dc_bios *bios,  	enum clock_source_id id, @@ -503,14 +503,14 @@ struct clock_source *dce120_clock_source_create(  	return NULL;  } -void dce120_clock_source_destroy(struct clock_source **clk_src) +static void dce120_clock_source_destroy(struct clock_source **clk_src)  {  	kfree(TO_DCE110_CLK_SRC(*clk_src));  	*clk_src = NULL;  } -bool dce120_hw_sequencer_create(struct dc *dc) +static bool dce120_hw_sequencer_create(struct dc *dc)  {  	/* All registers used by dce11.2 match those in dce11 in offset and  	 * structure @@ -837,7 +837,8 @@ static const struct resource_funcs dce120_res_pool_funcs = {  	.link_enc_create = dce120_link_encoder_create,  	.validate_bandwidth = dce112_validate_bandwidth,  	.validate_plane = dce100_validate_plane, -	.add_stream_to_ctx = dce112_add_stream_to_ctx +	.add_stream_to_ctx = dce112_add_stream_to_ctx, +	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link  };  static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 04b866f0fa1f..098e56962f2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg,  		CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr);  } -void dce120_tg_program_timing(struct timing_generator *tg, +static void dce120_tg_program_timing(struct timing_generator *tg,  	const struct dc_crtc_timing *timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios)  {  	if (use_vbios) @@ -1109,6 +1114,92 @@ static bool dce120_arm_vert_intr(  	return true;  } + +static bool dce120_is_tg_enabled(struct timing_generator *tg) +{ +	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); +	uint32_t value, field; + +	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL, +				  tg110->offsets.crtc); +	field = get_reg_field_value(value, CRTC0_CRTC_CONTROL, +				    CRTC_CURRENT_MASTER_EN_STATE); + +	return field == 1; +} + +static bool dce120_configure_crc(struct timing_generator *tg, +				 const struct crc_params *params) +{ +	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + +	/* Cannot configure crc on a CRTC that is disabled */ +	if (!dce120_is_tg_enabled(tg)) +		return false; + +	/* First, disable CRC before we configure it. */ +	dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, +			   tg110->offsets.crtc, 0); + +	if (!params->enable) +		return true; + +	/* Program frame boundaries */ +	/* Window A x axis start and end. */ +	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, +			  CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, +			  CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); + +	/* Window A y axis start and end. */ +	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, +			  CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, +			  CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); + +	/* Window B x axis start and end. */ +	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, +			  CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, +			  CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); + +	/* Window B y axis start and end. */ +	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, +			  CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, +			  CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); + +	/* Set crc mode and selection, and enable. Only using CRC0*/ +	CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, +			  CRTC_CRC_EN, params->continuous_mode ? 1 : 0, +			  CRTC_CRC0_SELECT, params->selection, +			  CRTC_CRC_EN, 1); + +	return true; +} + +static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, +			   uint32_t *g_y, uint32_t *b_cb) +{ +	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); +	uint32_t value, field; + +	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, +				  tg110->offsets.crtc); +	field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN); + +	/* Early return if CRC is not enabled for this CRTC */ +	if (!field) +		return false; + +	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, +				  tg110->offsets.crtc); +	*r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); +	*g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); + +	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, +				  tg110->offsets.crtc); +	*b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + +	return true; +} +  static const struct timing_generator_funcs dce120_tg_funcs = {  		.validate_timing = dce120_tg_validate_timing,  		.program_timing = dce120_tg_program_timing, @@ -1140,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = {  		.set_static_screen_control = dce120_timing_generator_set_static_screen_control,  		.set_test_pattern = dce120_timing_generator_set_test_pattern,  		.arm_vert_intr = dce120_arm_vert_intr, +		.is_tg_enabled = dce120_is_tg_enabled, +		.configure_crc = dce120_configure_crc, +		.get_crc = dce120_get_crc,  }; diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 27d0cc394963..2c21135a8510 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -880,7 +880,8 @@ static const struct resource_funcs dce80_res_pool_funcs = {  	.validate_bandwidth = dce80_validate_bandwidth,  	.validate_plane = dce100_validate_plane,  	.add_stream_to_ctx = dce100_add_stream_to_ctx, -	.validate_global = dce80_validate_global +	.validate_global = dce80_validate_global, +	.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link  };  static bool dce80_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 8b5ce557ee71..397e7f94e1e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)  static void program_timing(struct timing_generator *tg,  	const struct dc_crtc_timing *timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios)  {  	if (!use_vbios)  		program_pix_dur(tg, timing->pix_clk_100hz); -	dce110_tg_program_timing(tg, timing, use_vbios); +	dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);  }  static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index 2b2de1d913c9..9f2ffce10e12 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -27,6 +27,7 @@  #include "reg_helper.h"  #include "core_types.h" +#include "dal_asic_id.h"  #define TO_DCE_CLK_MGR(clocks)\  	container_of(clocks, struct dce_clk_mgr, base) @@ -91,13 +92,18 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl  static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks)  { +	int i;  	struct dc *dc = clk_mgr->ctx->dc;  	int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks);  	bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; -	int i;  	/* set disp clk to dpp clk threshold */ -	dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + +	if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { +		clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold); +		clk_mgr->funcs->set_dprefclk(clk_mgr); +	} else +		dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);  	/* update request dpp clk division option */  	for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -113,8 +119,13 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo  	}  	/* If target clk not same as dppclk threshold, set to target clock */ -	if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) -		dce112_set_clock(clk_mgr, new_clocks->dispclk_khz); +	if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) { +		if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { +			clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz); +			clk_mgr->funcs->set_dprefclk(clk_mgr); +		} else +			dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); +	}  	clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;  	clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; @@ -242,7 +253,62 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,  		}  	}  } -static const struct clk_mgr_funcs dcn1_funcs = { + +#define VBIOSSMC_MSG_SetDispclkFreq           0x4 +#define VBIOSSMC_MSG_SetDprefclkFreq          0x5 + +int dcn10_set_dispclk(struct clk_mgr *clk_mgr_base, int requested_dispclk_khz) +{ +	int actual_dispclk_set_khz = -1; +	struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + +	/* First clear response register */ +	//dm_write_reg(ctx, mmMP1_SMN_C2PMSG_91, 0); +	REG_WRITE(MP1_SMN_C2PMSG_91, 0); + +	/* Set the parameter register for the SMU message, unit is Mhz */ +	//dm_write_reg(ctx, mmMP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); +	REG_WRITE(MP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); + +	/* Trigger the message transaction by writing the message ID */ +	//dm_write_reg(ctx, mmMP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); +	REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); + +	REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + +	/* Actual dispclk set is returned in the parameter register */ +	actual_dispclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + +	return actual_dispclk_set_khz; + +} + +int dcn10_set_dprefclk(struct clk_mgr *clk_mgr_base) +{ +	int actual_dprefclk_set_khz = -1; +	struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + +	REG_WRITE(MP1_SMN_C2PMSG_91, 0); + +	/* Set the parameter register for the SMU message */ +	REG_WRITE(MP1_SMN_C2PMSG_83, clk_mgr_dce->dprefclk_khz / 1000); + +	/* Trigger the message transaction by writing the message ID */ +	REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDprefclkFreq); + +	/* Wait for SMU response */ +	REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + +	actual_dprefclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + +	return actual_dprefclk_set_khz; +} + +int (*set_dispclk)(struct pp_smu *pp_smu, int dispclk); + +int (*set_dprefclk)(struct pp_smu *pp_smu); + +static struct clk_mgr_funcs dcn1_funcs = {  	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,  	.update_clocks = dcn1_update_clocks  }; @@ -266,8 +332,8 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)  	clk_mgr_dce->dprefclk_ss_percentage = 0;  	clk_mgr_dce->dprefclk_ss_divider = 1000;  	clk_mgr_dce->ss_on_dprefclk = false; -  	clk_mgr_dce->dprefclk_khz = 600000; +  	if (bp->integrated_info)  		clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;  	if (clk_mgr_dce->dentist_vco_freq_khz == 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 0db2a6e96fc0..bf978831bb0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -263,20 +263,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub)  			DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1);  } -void hubbub1_program_watermarks( +void hubbub1_program_urgent_watermarks(  		struct hubbub *hubbub,  		struct dcn_watermark_set *watermarks,  		unsigned int refclk_mhz,  		bool safe_to_lower)  {  	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); -	/* -	 * Need to clamp to max of the register values (i.e. no wrap) -	 * for dcn1, all wm registers are 21-bit wide -	 */  	uint32_t prog_wm_value; -  	/* Repeat for water mark set A, B, C and D. */  	/* clock state A */  	if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) { @@ -291,60 +286,14 @@ void hubbub1_program_watermarks(  			watermarks->a.urgent_ns, prog_wm_value);  	} -	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { -		if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { -			hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; -			prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, -					refclk_mhz, 0x1fffff); -			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->a.pte_meta_urgent_ns, prog_wm_value); -		} -	} - -	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) { -		if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns -				> hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { -			hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = -					watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, -					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); -		} - -		if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns -				> hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { -			hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = -					watermarks->a.cstate_pstate.cstate_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->a.cstate_pstate.cstate_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, -					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); -		} -	} - -	if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns -			> hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { -		hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = -				watermarks->a.cstate_pstate.pstate_change_ns; -		prog_wm_value = convert_and_clamp( -				watermarks->a.cstate_pstate.pstate_change_ns, +	if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { +		hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; +		prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,  				refclk_mhz, 0x1fffff); -		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, -				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); -		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" -			"HW register value = 0x%x\n\n", -			watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); +		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->a.pte_meta_urgent_ns, prog_wm_value);  	}  	/* clock state B */ @@ -360,60 +309,14 @@ void hubbub1_program_watermarks(  			watermarks->b.urgent_ns, prog_wm_value);  	} -	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { -		if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { -			hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; -			prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, -					refclk_mhz, 0x1fffff); -			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->b.pte_meta_urgent_ns, prog_wm_value); -		} -	} - -	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) { -		if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns -				> hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { -			hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = -					watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, -					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); -		} - -		if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns -				> hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { -			hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = -					watermarks->b.cstate_pstate.cstate_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->b.cstate_pstate.cstate_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, -					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); -		} -	} - -	if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns -			> hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { -		hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = -				watermarks->b.cstate_pstate.pstate_change_ns; -		prog_wm_value = convert_and_clamp( -				watermarks->b.cstate_pstate.pstate_change_ns, +	if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { +		hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; +		prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,  				refclk_mhz, 0x1fffff); -		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, -				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); -		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" -			"HW register value = 0x%x\n\n", -			watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); +		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->b.pte_meta_urgent_ns, prog_wm_value);  	}  	/* clock state C */ @@ -429,60 +332,14 @@ void hubbub1_program_watermarks(  			watermarks->c.urgent_ns, prog_wm_value);  	} -	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { -		if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { -			hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; -			prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, -					refclk_mhz, 0x1fffff); -			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->c.pte_meta_urgent_ns, prog_wm_value); -		} -	} - -	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) { -		if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns -				> hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { -			hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = -					watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, -					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); -		} - -		if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns -				> hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { -			hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = -					watermarks->c.cstate_pstate.cstate_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->c.cstate_pstate.cstate_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, -					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); -		} -	} - -	if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns -			> hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { -		hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = -				watermarks->c.cstate_pstate.pstate_change_ns; -		prog_wm_value = convert_and_clamp( -				watermarks->c.cstate_pstate.pstate_change_ns, +	if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { +		hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; +		prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,  				refclk_mhz, 0x1fffff); -		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, -				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); -		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" -			"HW register value = 0x%x\n\n", -			watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); +		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->c.pte_meta_urgent_ns, prog_wm_value);  	}  	/* clock state D */ @@ -498,48 +355,199 @@ void hubbub1_program_watermarks(  			watermarks->d.urgent_ns, prog_wm_value);  	} -	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { -		if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { -			hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; -			prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, -					refclk_mhz, 0x1fffff); -			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->d.pte_meta_urgent_ns, prog_wm_value); -		} +	if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { +		hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; +		prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, +				refclk_mhz, 0x1fffff); +		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->d.pte_meta_urgent_ns, prog_wm_value);  	} +} -	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { -		if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns -				> hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { -			hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = -					watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, -					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); -		} +void hubbub1_program_stutter_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower) +{ +	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); +	uint32_t prog_wm_value; -		if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns -				> hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { -			hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = -					watermarks->d.cstate_pstate.cstate_exit_ns; -			prog_wm_value = convert_and_clamp( -					watermarks->d.cstate_pstate.cstate_exit_ns, -					refclk_mhz, 0x1fffff); -			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, -					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); -			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" -				"HW register value = 0x%x\n", -				watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); -		} +	/* clock state A */ +	if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns +			> hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { +		hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = +				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, +				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); +	} + +	if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns +			> hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { +		hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = +				watermarks->a.cstate_pstate.cstate_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->a.cstate_pstate.cstate_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, +				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); +	} + +	/* clock state B */ +	if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns +			> hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { +		hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = +				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, +				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);  	} +	if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns +			> hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { +		hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = +				watermarks->b.cstate_pstate.cstate_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->b.cstate_pstate.cstate_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, +				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); +	} + +	/* clock state C */ +	if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns +			> hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { +		hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = +				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, +				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); +	} + +	if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns +			> hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { +		hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = +				watermarks->c.cstate_pstate.cstate_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->c.cstate_pstate.cstate_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, +				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); +	} + +	/* clock state D */ +	if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns +			> hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { +		hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = +				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, +				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); +	} + +	if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns +			> hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { +		hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = +				watermarks->d.cstate_pstate.cstate_exit_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->d.cstate_pstate.cstate_exit_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, +				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" +			"HW register value = 0x%x\n", +			watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); +	} + +} + +void hubbub1_program_pstate_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower) +{ +	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); +	uint32_t prog_wm_value; + +	/* clock state A */ +	if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns +			> hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { +		hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = +				watermarks->a.cstate_pstate.pstate_change_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->a.cstate_pstate.pstate_change_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, +				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" +			"HW register value = 0x%x\n\n", +			watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); +	} + +	/* clock state B */ +	if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns +			> hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { +		hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = +				watermarks->b.cstate_pstate.pstate_change_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->b.cstate_pstate.pstate_change_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, +				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" +			"HW register value = 0x%x\n\n", +			watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); +	} + +	/* clock state C */ +	if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns +			> hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { +		hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = +				watermarks->c.cstate_pstate.pstate_change_ns; +		prog_wm_value = convert_and_clamp( +				watermarks->c.cstate_pstate.pstate_change_ns, +				refclk_mhz, 0x1fffff); +		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, +				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); +		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" +			"HW register value = 0x%x\n\n", +			watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); +	} + +	/* clock state D */  	if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns  			> hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) {  		hubbub1->watermarks.d.cstate_pstate.pstate_change_ns = @@ -553,6 +561,22 @@ void hubbub1_program_watermarks(  			"HW register value = 0x%x\n\n",  			watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value);  	} +} + +void hubbub1_program_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower) +{ +	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); +	/* +	 * Need to clamp to max of the register values (i.e. no wrap) +	 * for dcn1, all wm registers are 21-bit wide +	 */ +	hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); +	hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); +	hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);  	REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL,  			DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); @@ -903,9 +927,7 @@ void hubbub1_construct(struct hubbub *hubbub,  	hubbub1->masks = hubbub_mask;  	hubbub1->debug_test_index_pstate = 0x7; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	if (ctx->dce_version == DCN_VERSION_1_01)  		hubbub1->debug_test_index_pstate = 0xB; -#endif  } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 85811b24a497..7c2559c9ae23 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub,  	const struct dcn_hubbub_shift *hubbub_shift,  	const struct dcn_hubbub_mask *hubbub_mask); +void hubbub1_program_urgent_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower); +void hubbub1_program_stutter_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower); +void hubbub1_program_pstate_watermarks( +		struct hubbub *hubbub, +		struct dcn_watermark_set *watermarks, +		unsigned int refclk_mhz, +		bool safe_to_lower); +  #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 33d311cea28c..66bb0e7db25c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -658,16 +658,15 @@ static enum dc_status dcn10_enable_stream_timing(  		BREAK_TO_DEBUGGER();  		return DC_ERROR_UNEXPECTED;  	} -	pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; -	pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; -	pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; -	pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - -	pipe_ctx->stream_res.tg->dlg_otg_param.signal =  pipe_ctx->stream->signal;  	pipe_ctx->stream_res.tg->funcs->program_timing(  			pipe_ctx->stream_res.tg,  			&stream->timing, +			pipe_ctx->pipe_dlg_param.vready_offset, +			pipe_ctx->pipe_dlg_param.vstartup_start, +			pipe_ctx->pipe_dlg_param.vupdate_offset, +			pipe_ctx->pipe_dlg_param.vupdate_width, +			pipe_ctx->stream->signal,  			true);  #if 0 /* move to after enable_crtc */ @@ -1756,7 +1755,7 @@ static void dcn10_program_output_csc(struct dc *dc,  bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)  { -	if (pipe_ctx->plane_state->visible) +	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)  		return true;  	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))  		return true; @@ -1765,7 +1764,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)  bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)  { -	if (pipe_ctx->plane_state->visible) +	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)  		return true;  	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))  		return true; @@ -1774,7 +1773,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)  bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)  { -	if (pipe_ctx->plane_state->visible) +	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)  		return true;  	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))  		return true; @@ -1920,7 +1919,7 @@ static uint16_t fixed_point_to_int_frac(  	return result;  } -void build_prescale_params(struct  dc_bias_and_scale *bias_and_scale, +void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,  		const struct dc_plane_state *plane_state)  {  	if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN @@ -1953,7 +1952,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)  			plane_state->color_space);  	//set scale and bias registers -	build_prescale_params(&bns_params, plane_state); +	dcn10_build_prescale_params(&bns_params, plane_state);  	if (dpp->funcs->dpp_program_bias_and_scale)  		dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);  } @@ -2279,14 +2278,15 @@ static void program_all_pipe_in_tree(  	if (pipe_ctx->top_pipe == NULL) {  		bool blank = !is_pipe_tree_visible(pipe_ctx); -		pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; -		pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; -		pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; -		pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; -		pipe_ctx->stream_res.tg->dlg_otg_param.signal =  pipe_ctx->stream->signal; -  		pipe_ctx->stream_res.tg->funcs->program_global_sync( -				pipe_ctx->stream_res.tg); +				pipe_ctx->stream_res.tg, +				pipe_ctx->pipe_dlg_param.vready_offset, +				pipe_ctx->pipe_dlg_param.vstartup_start, +				pipe_ctx->pipe_dlg_param.vupdate_offset, +				pipe_ctx->pipe_dlg_param.vupdate_width); + +		pipe_ctx->stream_res.tg->funcs->set_vtg_params( +				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);  		dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); @@ -2644,9 +2644,6 @@ static void dcn10_wait_for_mpcc_disconnect(  			res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);  			pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;  			hubp->funcs->set_blank(hubp, true); -			/*DC_LOG_ERROR(dc->ctx->logger, -					"[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n", -					i);*/  		}  	} @@ -2790,7 +2787,6 @@ static void apply_front_porch_workaround(  int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)  { -	struct timing_generator *optc = pipe_ctx->stream_res.tg;  	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;  	struct dc_crtc_timing patched_crtc_timing;  	int vesa_sync_start; @@ -2813,7 +2809,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)  			* interlace_factor;  	vertical_line_start = asic_blank_end - -			optc->dlg_otg_param.vstartup_start + 1; +			pipe_ctx->pipe_dlg_param.vstartup_start + 1;  	return vertical_line_start;  } @@ -2961,6 +2957,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,  	}  } +static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx, +				const uint8_t *custom_sdp_message, +				unsigned int sdp_message_size) +{ +	if (dc_is_dp_signal(pipe_ctx->stream->signal)) { +		pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message( +				pipe_ctx->stream_res.stream_enc, +				custom_sdp_message, +				sdp_message_size); +	} +} +  static const struct hw_sequencer_funcs dcn10_funcs = {  	.program_gamut_remap = program_gamut_remap,  	.init_hw = dcn10_init_hw, @@ -2980,6 +2988,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {  	.enable_timing_synchronization = dcn10_enable_timing_synchronization,  	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,  	.update_info_frame = dce110_update_info_frame, +	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,  	.enable_stream = dce110_enable_stream,  	.disable_stream = dce110_disable_stream,  	.unblank_stream = dcn10_unblank_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 4b3b27a5d23b..ef94d6b15843 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream(  int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); +void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale, +		const struct dc_plane_state *plane_state);  void lock_all_pipes(struct dc *dc,  	struct dc_state *context,  	bool lock); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 0126a44ba012..e25ae43f8d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -726,6 +726,8 @@ void dcn10_link_encoder_construct(  		enc10->base.features.flags.bits.IS_HBR3_CAPABLE =  				bp_cap_info.DP_HBR3_EN;  		enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; +		enc10->base.features.flags.bits.DP_IS_USB_C = +				bp_cap_info.DP_IS_USB_C;  	} else {  		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",  				__func__, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0345d51e9d6f..533b0f3cf6c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -46,9 +46,7 @@  * This is a workaround for a bug that has existed since R5xx and has not been  * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.  */ -static void optc1_apply_front_porch_workaround( -	struct timing_generator *optc, -	struct dc_crtc_timing *timing) +static void apply_front_porch_workaround(struct dc_crtc_timing *timing)  {  	if (timing->flags.INTERLACE == 1) {  		if (timing->v_front_porch < 2) @@ -60,24 +58,33 @@ static void optc1_apply_front_porch_workaround(  }  void optc1_program_global_sync( -		struct timing_generator *optc) +		struct timing_generator *optc, +		int vready_offset, +		int vstartup_start, +		int vupdate_offset, +		int vupdate_width)  {  	struct optc *optc1 = DCN10TG_FROM_TG(optc); -	if (optc->dlg_otg_param.vstartup_start == 0) { +	optc1->vready_offset = vready_offset; +	optc1->vstartup_start = vstartup_start; +	optc1->vupdate_offset = vupdate_offset; +	optc1->vupdate_width = vupdate_width; + +	if (optc1->vstartup_start == 0) {  		BREAK_TO_DEBUGGER();  		return;  	}  	REG_SET(OTG_VSTARTUP_PARAM, 0, -		VSTARTUP_START, optc->dlg_otg_param.vstartup_start); +		VSTARTUP_START, optc1->vstartup_start);  	REG_SET_2(OTG_VUPDATE_PARAM, 0, -			VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset, -			VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width); +			VUPDATE_OFFSET, optc1->vupdate_offset, +			VUPDATE_WIDTH, optc1->vupdate_width);  	REG_SET(OTG_VREADY_PARAM, 0, -			VREADY_OFFSET, optc->dlg_otg_param.vready_offset); +			VREADY_OFFSET, optc1->vready_offset);  }  static void optc1_disable_stereo(struct timing_generator *optc) @@ -132,25 +139,32 @@ void optc1_setup_vertical_interrupt2(  void optc1_program_timing(  	struct timing_generator *optc,  	const struct dc_crtc_timing *dc_crtc_timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios)  {  	struct dc_crtc_timing patched_crtc_timing; -	uint32_t vesa_sync_start;  	uint32_t asic_blank_end;  	uint32_t asic_blank_start;  	uint32_t v_total;  	uint32_t v_sync_end; -	uint32_t v_init, v_fp2;  	uint32_t h_sync_polarity, v_sync_polarity;  	uint32_t start_point = 0;  	uint32_t field_num = 0;  	uint32_t h_div_2; -	int32_t vertical_line_start;  	struct optc *optc1 = DCN10TG_FROM_TG(optc); +	optc1->signal = signal; +	optc1->vready_offset = vready_offset; +	optc1->vstartup_start = vstartup_start; +	optc1->vupdate_offset = vupdate_offset; +	optc1->vupdate_width = vupdate_width;  	patched_crtc_timing = *dc_crtc_timing; -	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); +	apply_front_porch_workaround(&patched_crtc_timing);  	/* Load horizontal timing */ @@ -163,24 +177,16 @@ void optc1_program_timing(  			OTG_H_SYNC_A_START, 0,  			OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); -	/* asic_h_blank_end = HsyncWidth + HbackPorch = -	 * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - -	 * vesa.h_left_border -	 */ -	vesa_sync_start = patched_crtc_timing.h_addressable + -			patched_crtc_timing.h_border_right + +	/* blank_start = line end - front porch */ +	asic_blank_start = patched_crtc_timing.h_total -  			patched_crtc_timing.h_front_porch; -	asic_blank_end = patched_crtc_timing.h_total - -			vesa_sync_start - +	/* blank_end = blank_start - active */ +	asic_blank_end = asic_blank_start - +			patched_crtc_timing.h_border_right - +			patched_crtc_timing.h_addressable -  			patched_crtc_timing.h_border_left; -	/* h_blank_start = v_blank_end + v_active */ -	asic_blank_start = asic_blank_end + -			patched_crtc_timing.h_border_left + -			patched_crtc_timing.h_addressable + -			patched_crtc_timing.h_border_right; -  	REG_UPDATE_2(OTG_H_BLANK_START_END,  			OTG_H_BLANK_START, asic_blank_start,  			OTG_H_BLANK_END, asic_blank_end); @@ -212,24 +218,15 @@ void optc1_program_timing(  			OTG_V_SYNC_A_START, 0,  			OTG_V_SYNC_A_END, v_sync_end); -	vesa_sync_start = patched_crtc_timing.v_addressable + -			patched_crtc_timing.v_border_bottom + +	/* blank_start = frame end - front porch */ +	asic_blank_start = patched_crtc_timing.v_total -  			patched_crtc_timing.v_front_porch; -	asic_blank_end = (patched_crtc_timing.v_total - -			vesa_sync_start - -			patched_crtc_timing.v_border_top); - -	/* v_blank_start = v_blank_end + v_active */ -	asic_blank_start = asic_blank_end + -			(patched_crtc_timing.v_border_top + -			patched_crtc_timing.v_addressable + -			patched_crtc_timing.v_border_bottom); - -	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; -	v_fp2 = 0; -	if (vertical_line_start < 0) -		v_fp2 = -vertical_line_start; +	/* blank_end = blank_start - active */ +	asic_blank_end = asic_blank_start - +			patched_crtc_timing.v_border_bottom - +			patched_crtc_timing.v_addressable - +			patched_crtc_timing.v_border_top;  	REG_UPDATE_2(OTG_V_BLANK_START_END,  			OTG_V_BLANK_START, asic_blank_start, @@ -242,10 +239,9 @@ void optc1_program_timing(  	REG_UPDATE(OTG_V_SYNC_A_CNTL,  		OTG_V_SYNC_A_POL, v_sync_polarity); -	v_init = asic_blank_start; -	if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || -		optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || -		optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) { +	if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || +			optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || +			optc1->signal == SIGNAL_TYPE_EDP) {  		start_point = 1;  		if (patched_crtc_timing.flags.INTERLACE == 1)  			field_num = 1; @@ -253,13 +249,10 @@ void optc1_program_timing(  	/* Interlace */  	if (REG(OTG_INTERLACE_CONTROL)) { -		if (patched_crtc_timing.flags.INTERLACE == 1) { +		if (patched_crtc_timing.flags.INTERLACE == 1)  			REG_UPDATE(OTG_INTERLACE_CONTROL,  					OTG_INTERLACE_ENABLE, 1); -			v_init = v_init / 2; -			if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) -				v_fp2 = v_fp2 / 2; -		} else +		else  			REG_UPDATE(OTG_INTERLACE_CONTROL,  					OTG_INTERLACE_ENABLE, 0);  	} @@ -268,16 +261,18 @@ void optc1_program_timing(  	REG_UPDATE(CONTROL,  			VTG0_ENABLE, 0); -	REG_UPDATE_2(CONTROL, -			VTG0_FP2, v_fp2, -			VTG0_VCOUNT_INIT, v_init); -  	/* original code is using VTG offset to address OTG reg, seems wrong */  	REG_UPDATE_2(OTG_CONTROL,  			OTG_START_POINT_CNTL, start_point,  			OTG_FIELD_NUMBER_CNTL, field_num); -	optc1_program_global_sync(optc); +	optc->funcs->program_global_sync(optc, +			vready_offset, +			vstartup_start, +			vupdate_offset, +			vupdate_width); + +	optc->funcs->set_vtg_params(optc, dc_crtc_timing);  	/* TODO  	 * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 @@ -296,6 +291,48 @@ void optc1_program_timing(  } +void optc1_set_vtg_params(struct timing_generator *optc, +		const struct dc_crtc_timing *dc_crtc_timing) +{ +	struct dc_crtc_timing patched_crtc_timing; +	uint32_t asic_blank_end; +	uint32_t v_init; +	uint32_t v_fp2 = 0; +	int32_t vertical_line_start; + +	struct optc *optc1 = DCN10TG_FROM_TG(optc); + +	patched_crtc_timing = *dc_crtc_timing; +	apply_front_porch_workaround(&patched_crtc_timing); + +	/* VCOUNT_INIT is the start of blank */ +	v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; + +	/* end of blank = v_init - active */ +	asic_blank_end = v_init - +			patched_crtc_timing.v_border_bottom - +			patched_crtc_timing.v_addressable - +			patched_crtc_timing.v_border_top; + +	/* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */ +	vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; +	if (vertical_line_start < 0) +		v_fp2 = -vertical_line_start; + +	/* Interlace */ +	if (REG(OTG_INTERLACE_CONTROL)) { +		if (patched_crtc_timing.flags.INTERLACE == 1) { +			v_init = v_init / 2; +			if ((optc1->vstartup_start/2)*2 > asic_blank_end) +				v_fp2 = v_fp2 / 2; +		} +	} + +	REG_UPDATE_2(CONTROL, +			VTG0_FP2, v_fp2, +			VTG0_VCOUNT_INIT, v_init); +} +  void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)  {  	struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1420,6 +1457,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {  		.clear_optc_underflow = optc1_clear_optc_underflow,  		.get_crc = optc1_get_crc,  		.configure_crc = optc1_configure_crc, +		.set_vtg_params = optc1_set_vtg_params,  };  void dcn10_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 4eb9a898c237..651b8caa4b9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -446,6 +446,12 @@ struct optc {  	uint32_t min_v_sync_width;  	uint32_t min_v_blank;  	uint32_t min_v_blank_interlace; + +	int vstartup_start; +	int vupdate_offset; +	int vupdate_width; +	int vready_offset; +	enum signal_type signal;  };  void dcn10_timing_generator_init(struct optc *optc); @@ -481,6 +487,11 @@ bool optc1_validate_timing(  void optc1_program_timing(  	struct timing_generator *optc,  	const struct dc_crtc_timing *dc_crtc_timing, +	int vready_offset, +	int vstartup_start, +	int vupdate_offset, +	int vupdate_width, +	const enum signal_type signal,  	bool use_vbios);  void optc1_setup_vertical_interrupt0( @@ -495,7 +506,11 @@ void optc1_setup_vertical_interrupt2(  		uint32_t start_line);  void optc1_program_global_sync( -		struct timing_generator *optc); +		struct timing_generator *optc, +		int vready_offset, +		int vstartup_start, +		int vupdate_offset, +		int vupdate_width);  bool optc1_disable_crtc(struct timing_generator *optc); @@ -582,4 +597,7 @@ bool optc1_get_crc(struct timing_generator *optc,  bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); +void optc1_set_vtg_params(struct timing_generator *optc, +		const struct dc_crtc_timing *dc_crtc_timing); +  #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 7eccb54c421d..bfddd51294a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -29,7 +29,6 @@  #include "resource.h"  #include "include/irq_service_interface.h"  #include "dcn10_resource.h" -  #include "dcn10_ipp.h"  #include "dcn10_mpc.h"  #include "irq/dcn10/irq_service_dcn10.h" @@ -153,9 +152,7 @@ enum dcn10_clk_src_array_id {  	DCN10_CLK_SRC_PLL2,  	DCN10_CLK_SRC_PLL3,  	DCN10_CLK_SRC_TOTAL, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 -#endif  };  /* begin ********************* @@ -445,7 +442,6 @@ static const struct bios_registers bios_regs = {  	HUBP_REG_LIST_DCN10(id)\  } -  static const struct dcn_mi_registers hubp_regs[] = {  	hubp_regs(0),  	hubp_regs(1), @@ -461,7 +457,6 @@ static const struct dcn_mi_mask hubp_mask = {  		HUBP_MASK_SH_LIST_DCN10(_MASK)  }; -  static const struct dcn_hubbub_registers hubbub_reg = {  		HUBBUB_REG_LIST_DCN10(0)  }; @@ -494,6 +489,27 @@ static const struct dce110_clk_src_mask cs_mask = {  		CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK)  }; + +#define mmMP1_SMN_C2PMSG_91            0x1629B +#define mmMP1_SMN_C2PMSG_83            0x16293 +#define mmMP1_SMN_C2PMSG_67            0x16283 + +#define MP1_SMN_C2PMSG_91__CONTENT_MASK                    0xffffffffL +#define MP1_SMN_C2PMSG_83__CONTENT_MASK                    0xffffffffL +#define MP1_SMN_C2PMSG_67__CONTENT_MASK                    0xffffffffL +#define	MP1_SMN_C2PMSG_91__CONTENT__SHIFT                  0x00000000 +#define	MP1_SMN_C2PMSG_83__CONTENT__SHIFT                  0x00000000 +#define	MP1_SMN_C2PMSG_67__CONTENT__SHIFT                  0x00000000 + + +static const struct clk_mgr_shift clk_mgr_shift = { +		CLK_MASK_SH_LIST_RV1(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask = { +		CLK_MASK_SH_LIST_RV1(_MASK) +}; +  static const struct resource_caps res_cap = {  		.num_timing_generator = 4,  		.num_opp = 4, @@ -504,7 +520,6 @@ static const struct resource_caps res_cap = {  		.num_ddc = 4,  }; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  static const struct resource_caps rv2_res_cap = {  		.num_timing_generator = 3,  		.num_opp = 3, @@ -514,7 +529,6 @@ static const struct resource_caps rv2_res_cap = {  		.num_pll = 3,  		.num_ddc = 3,  }; -#endif  static const struct dc_plane_cap plane_cap = {  	.type = DC_PLANE_TYPE_DCN_UNIVERSAL, @@ -1217,6 +1231,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan  	return result;  } +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream) +{ +	int i; +	int j = -1; +	struct dc_link *link = stream->link; + +	for (i = 0; i < pool->stream_enc_count; i++) { +		if (!res_ctx->is_stream_enc_acquired[i] && +				pool->stream_enc[i]) { +			/* Store first available for MST second display +			 * in daisy chain use case +			 */ +			j = i; +			if (pool->stream_enc[i]->id == +					link->link_enc->preferred_engine) +				return pool->stream_enc[i]; +		} +	} + +	/* +	 * For CZ and later, we can allow DIG FE and BE to differ for all display types +	 */ + +	if (j >= 0) +		return pool->stream_enc[j]; + +	return NULL; +} +  static const struct dc_cap_funcs cap_funcs = {  	.get_dcc_compression_cap = dcn10_get_dcc_compression_cap  }; @@ -1229,7 +1275,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {  	.validate_plane = dcn10_validate_plane,  	.validate_global = dcn10_validate_global,  	.add_stream_to_ctx = dcn10_add_stream_to_ctx, -	.get_default_swizzle_mode = dcn10_get_default_swizzle_mode +	.get_default_swizzle_mode = dcn10_get_default_swizzle_mode, +	.find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link  };  static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -1252,11 +1299,9 @@ static bool construct(  	ctx->dc_bios->regs = &bios_regs; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	if (ctx->dce_version == DCN_VERSION_1_01)  		pool->base.res_cap = &rv2_res_cap;  	else -#endif  		pool->base.res_cap = &res_cap;  	pool->base.funcs = &dcn10_res_pool_funcs; @@ -1273,10 +1318,8 @@ static bool construct(  	/* max pipe num for ASIC before check pipe fuses */  	pool->base.pipe_count = pool->base.res_cap->num_timing_generator; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	if (dc->ctx->dce_version == DCN_VERSION_1_01)  		pool->base.pipe_count = 3; -#endif  	dc->caps.max_video_width = 3840;  	dc->caps.max_downscale_ratio = 200;  	dc->caps.i2c_speed_in_khz = 100; @@ -1309,26 +1352,17 @@ static bool construct(  				CLOCK_SOURCE_COMBO_PHY_PLL2,  				&clk_src_regs[2], false); -#ifdef CONFIG_DRM_AMD_DC_DCN1_01  	if (dc->ctx->dce_version == DCN_VERSION_1_0) {  		pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =  				dcn10_clock_source_create(ctx, ctx->dc_bios,  					CLOCK_SOURCE_COMBO_PHY_PLL3,  					&clk_src_regs[3], false);  	} -#else -	pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = -			dcn10_clock_source_create(ctx, ctx->dc_bios, -				CLOCK_SOURCE_COMBO_PHY_PLL3, -				&clk_src_regs[3], false); -#endif  	pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	if (dc->ctx->dce_version == DCN_VERSION_1_01)  		pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; -#endif  	pool->base.dp_clock_source =  			dcn10_clock_source_create(ctx, ctx->dc_bios, @@ -1343,12 +1377,6 @@ static bool construct(  			goto fail;  		}  	} -	pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); -	if (pool->base.clk_mgr == NULL) { -		dm_error("DC: failed to create display clock!\n"); -		BREAK_TO_DEBUGGER(); -		goto fail; -	}  	pool->base.dmcu = dcn10_dmcu_create(ctx,  			&dmcu_regs, @@ -1374,7 +1402,6 @@ static bool construct(  	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));  	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	if (dc->ctx->dce_version == DCN_VERSION_1_01) {  		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;  		struct dcn_ip_params *dcn_ip = dc->dcn_ip; @@ -1385,7 +1412,6 @@ static bool construct(  		dcn_soc->dram_clock_change_latency = 23;  		dcn_ip->max_num_dpp = 3;  	} -#endif  	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {  		dc->dcn_soc->urgent_latency = 3;  		dc->debug.disable_dmcu = true; @@ -1410,6 +1436,13 @@ static bool construct(  	pool->base.pp_smu = dcn10_pp_smu_create(ctx); +	pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); +	if (pool->base.clk_mgr == NULL) { +		dm_error("DC: failed to create display clock!\n"); +		BREAK_TO_DEBUGGER(); +		goto fail; +	} +  	if (!dc->debug.disable_pplib_clock_request)  		dcn_bw_update_from_pplib(dc);  	dcn_bw_sync_calcs_and_dml(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h index 999c684a0b36..633025ccb870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h @@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool(  		const struct dc_init_data *init_data,  		struct dc *dc); +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( +		struct resource_context *res_ctx, +		const struct resource_pool *pool, +		struct dc_stream_state *stream); +  #endif /* __DC_RESOURCE_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 8ee9f6dc1d62..ba71b5224e7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -415,6 +415,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(  	case COLOR_SPACE_APPCTRL:  	case COLOR_SPACE_CUSTOMPOINTS:  	case COLOR_SPACE_UNKNOWN: +	case COLOR_SPACE_YCBCR709_BLACK:  		/* do nothing */  		break;  	} @@ -726,11 +727,9 @@ void enc1_stream_encoder_update_dp_info_packets(  				3,  /* packetIndex */  				&info_frame->hdrsmd); -	if (info_frame->dpsdp.valid) -		enc1_update_generic_info_packet( -				enc1, -				4,/* packetIndex */ -				&info_frame->dpsdp); +	/* packetIndex 4 is used for send immediate sdp message, and please +	 * use other packetIndex (such as 5,6) for other info packet +	 */  	/* enable/disable transmission of packet(s).  	 * If enabled, packet transmission begins on the next frame @@ -738,7 +737,101 @@ void enc1_stream_encoder_update_dp_info_packets(  	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);  	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);  	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); -	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid); + + +	/* This bit is the master enable bit. +	 * When enabling secondary stream engine, +	 * this master bit must also be set. +	 * This register shared with audio info frame. +	 * Therefore we need to enable master bit +	 * if at least on of the fields is not 0 +	 */ +	value = REG_READ(DP_SEC_CNTL); +	if (value) +		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +void enc1_stream_encoder_send_immediate_sdp_message( +	struct stream_encoder *enc, +	const uint8_t *custom_sdp_message, +	unsigned int sdp_message_size) +{ +	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); +	uint32_t value = 0; + +	/* TODOFPGA Figure out a proper number for max_retries polling for lock +	 * use 50 for now. +	 */ +	uint32_t max_retries = 50; + +	/* check if GSP4 is transmitted */ +	REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, +		0, 10, max_retries); + +	/* disable GSP4 transmitting */ +	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0); + +	/* transmit GSP4 at the earliest time in a frame */ +	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1); + +	/*we need turn on clock before programming AFMT block*/ +	REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + +	/* check if HW reading GSP memory */ +	REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, +			0, 10, max_retries); + +	/* HW does is not reading GSP memory not reading too long -> +	 * something wrong. clear GPS memory access and notify? +	 * hw SW is writing to GSP memory +	 */ +	REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); + +	/* use generic packet 4 for immediate sdp message */ +	REG_UPDATE(AFMT_VBI_PACKET_CONTROL, +			AFMT_GENERIC_INDEX, 4); + +	/* write generic packet header +	 * (4th byte is for GENERIC0 only) +	 */ +	REG_SET_4(AFMT_GENERIC_HDR, 0, +			AFMT_GENERIC_HB0, custom_sdp_message[0], +			AFMT_GENERIC_HB1, custom_sdp_message[1], +			AFMT_GENERIC_HB2, custom_sdp_message[2], +			AFMT_GENERIC_HB3, custom_sdp_message[3]); + +	/* write generic packet contents +	 * (we never use last 4 bytes) +	 * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers +	 */ +	{ +		const uint32_t *content = +			(const uint32_t *) &custom_sdp_message[4]; + +		REG_WRITE(AFMT_GENERIC_0, *content++); +		REG_WRITE(AFMT_GENERIC_1, *content++); +		REG_WRITE(AFMT_GENERIC_2, *content++); +		REG_WRITE(AFMT_GENERIC_3, *content++); +		REG_WRITE(AFMT_GENERIC_4, *content++); +		REG_WRITE(AFMT_GENERIC_5, *content++); +		REG_WRITE(AFMT_GENERIC_6, *content++); +		REG_WRITE(AFMT_GENERIC_7, *content); +	} + +	/* check whether GENERIC4 registers double buffer update in immediate mode +	 * is pending +	 */ +	REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, +			0, 10, max_retries); + +	/* atomically update double-buffered GENERIC4 registers in immediate mode +	 * (update immediately) +	 */ +	REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, +			AFMT_GENERIC4_IMMEDIATE_UPDATE, 1); + +	/* enable GSP4 transmitting */ +	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1);  	/* This bit is the master enable bit.  	 * When enabling secondary stream engine, @@ -1462,6 +1555,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {  		enc1_stream_encoder_stop_hdmi_info_packets,  	.update_dp_info_packets =  		enc1_stream_encoder_update_dp_info_packets, +	.send_immediate_sdp_message = +		enc1_stream_encoder_send_immediate_sdp_message,  	.stop_dp_info_packets =  		enc1_stream_encoder_stop_dp_info_packets,  	.dp_blank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index e654c2f55971..a292b106a8b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -81,6 +81,7 @@  	SRI(DP_MSE_RATE_UPDATE, DP, id), \  	SRI(DP_PIXEL_FORMAT, DP, id), \  	SRI(DP_SEC_CNTL, DP, id), \ +	SRI(DP_SEC_CNTL2, DP, id), \  	SRI(DP_STEER_FIFO, DP, id), \  	SRI(DP_VID_M, DP, id), \  	SRI(DP_VID_N, DP, id), \ @@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers {  	uint32_t AFMT_60958_1;  	uint32_t AFMT_60958_2;  	uint32_t DIG_FE_CNTL; +	uint32_t DIG_FE_CNTL2;  	uint32_t DP_MSE_RATE_CNTL;  	uint32_t DP_MSE_RATE_UPDATE;  	uint32_t DP_PIXEL_FORMAT;  	uint32_t DP_SEC_CNTL; +	uint32_t DP_SEC_CNTL2;  	uint32_t DP_STEER_FIFO;  	uint32_t DP_VID_M;  	uint32_t DP_VID_N; @@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers {  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ +	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\ +	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\ +	SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\ +	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\  	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\  	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\  	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ @@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers {  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ +	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ @@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers {  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ +	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\  	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ @@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers {  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\  	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ +	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\  	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\  	SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\  	SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ @@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers {  	type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\  	type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\  	type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ +	type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\  	type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\  	type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\  	type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ @@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers {  	type AFMT_GENERIC2_FRAME_UPDATE;\  	type AFMT_GENERIC3_FRAME_UPDATE;\  	type AFMT_GENERIC4_FRAME_UPDATE;\ +	type AFMT_GENERIC4_IMMEDIATE_UPDATE;\  	type AFMT_GENERIC5_FRAME_UPDATE;\  	type AFMT_GENERIC6_FRAME_UPDATE;\  	type AFMT_GENERIC7_FRAME_UPDATE;\ @@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers {  	type DP_SEC_GSP5_ENABLE;\  	type DP_SEC_GSP6_ENABLE;\  	type DP_SEC_GSP7_ENABLE;\ +	type DP_SEC_GSP7_PPS;\  	type DP_SEC_GSP7_SEND;\ +	type DP_SEC_GSP4_SEND;\ +	type DP_SEC_GSP4_SEND_PENDING;\ +	type DP_SEC_GSP4_LINE_NUM;\ +	type DP_SEC_GSP4_SEND_ANY_LINE;\  	type DP_SEC_MPG_ENABLE;\  	type DP_VID_STREAM_DIS_DEFER;\  	type DP_VID_STREAM_ENABLE;\ @@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets(  	struct stream_encoder *enc,  	const struct encoder_info_frame *info_frame); +void enc1_stream_encoder_send_immediate_sdp_message( +	struct stream_encoder *enc, +	const uint8_t *custom_sdp_message, +				unsigned int sdp_message_size); +  void enc1_stream_encoder_stop_dp_info_packets(  	struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 4fc4208d1472..9f7ebf6a4e40 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -80,6 +80,7 @@ struct pp_smu_funcs_rv {  	/* PPSMC_MSG_SetDisplayCount  	 * 0 triggers S0i2 optimization  	 */ +  	void (*set_display_count)(struct pp_smu *pp, int count);  	/* reader and writer WM's are sent together as part of one table*/ @@ -115,7 +116,6 @@ struct pp_smu_funcs_rv {  	/* PME w/a */  	void (*set_pme_wa_enable)(struct pp_smu *pp); -  };  struct pp_smu_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index c5b791d158a7..6cc59f138095 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st {  	unsigned char xfc_enable;  	unsigned char xfc_slave;  	struct _vcs_dpi_display_xfc_params_st xfc_params; +	//for vstartuplines calculation freesync +	unsigned char v_total_min; +	unsigned char v_total_max;  };  struct writeback_st {  	int wb_src_height; @@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st {  	unsigned char otg_inst;  	unsigned char odm_combine;  	unsigned char use_maximum_vstartup; +	unsigned int vtotal_max; +	unsigned int vtotal_min;  };  struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c index c2028c4744a6..a610fae16280 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c @@ -84,10 +84,6 @@ bool dal_hw_factory_init(  		return true;  #if defined(CONFIG_DRM_AMD_DC_DCN1_0)  	case DCN_VERSION_1_0: -		dal_hw_factory_dcn10_init(factory); -		return true; -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	case DCN_VERSION_1_01:  		dal_hw_factory_dcn10_init(factory);  		return true; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c index 236ca28784a9..77615146b96e 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c @@ -84,11 +84,6 @@ bool dal_hw_translate_init(  		dal_hw_translate_dcn10_init(translate);  		return true;  #endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) -	case DCN_VERSION_1_01: -		dal_hw_translate_dcn10_init(translate); -		return true; -#endif  	default:  		BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 6f5ab05d6467..539d34d3439c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -123,6 +123,11 @@ struct resource_funcs {  	enum dc_status (*get_default_swizzle_mode)(  			struct dc_plane_state *plane_state); +	struct stream_encoder *(*find_first_free_match_stream_enc_for_link)( +			struct resource_context *res_ctx, +			const struct resource_pool *pool, +			struct dc_stream_state *stream); +  };  struct audio_support{ @@ -212,6 +217,25 @@ struct plane_resource {  	struct dcn_fe_bandwidth bw;  }; +union pipe_update_flags { +	struct { +		uint32_t enable : 1; +		uint32_t disable : 1; +		uint32_t odm : 1; +		uint32_t global_sync : 1; +		uint32_t opp_changed : 1; +		uint32_t tg_changed : 1; +		uint32_t mpcc : 1; +		uint32_t dppclk : 1; +		uint32_t hubp_interdependent : 1; +		uint32_t hubp_rq_dlg_ttu : 1; +		uint32_t gamut_remap : 1; +		uint32_t scaler : 1; +		uint32_t viewport : 1; +	} bits; +	uint32_t raw; +}; +  struct pipe_ctx {  	struct dc_plane_state *plane_state;  	struct dc_stream_state *stream; @@ -234,6 +258,7 @@ struct pipe_ctx {  	struct _vcs_dpi_display_rq_regs_st rq_regs;  	struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;  #endif +	union pipe_update_flags update_flags;  };  struct resource_context { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 31bd6d5183ab..f3fd3f8cac26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -31,7 +31,7 @@  struct clk_mgr {  	struct dc_context *ctx; -	const struct clk_mgr_funcs *funcs; +	struct clk_mgr_funcs *funcs;  	struct dc_clocks clks;  }; @@ -44,6 +44,12 @@ struct clk_mgr_funcs {  	int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr);  	void (*init_clocks)(struct clk_mgr *clk_mgr); + +	/* Returns actual clk that's set */ +	int (*set_dispclk)(struct clk_mgr *clk_mgr, int requested_dispclk_khz); +	int (*set_dprefclk)(struct clk_mgr *clk_mgr);  }; + +  #endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index c9d3e37e9531..ca162079a41b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -59,6 +59,7 @@ struct encoder_feature_support {  			uint32_t IS_TPS3_CAPABLE:1;  			uint32_t IS_TPS4_CAPABLE:1;  			uint32_t HDMI_6GB_EN:1; +			uint32_t DP_IS_USB_C:1;  		} bits;  		uint32_t raw;  	} flags; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 49854eb73d1d..537563888f87 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -63,8 +63,6 @@ struct encoder_info_frame {  	struct dc_info_packet vsc;  	/* HDR Static MetaData */  	struct dc_info_packet hdrsmd; -	/* custom sdp message */ -	struct dc_info_packet dpsdp;  };  struct encoder_unblank_param { @@ -123,6 +121,11 @@ struct stream_encoder_funcs {  		struct stream_encoder *enc,  		const struct encoder_info_frame *info_frame); +	void (*send_immediate_sdp_message)( +				struct stream_encoder *enc, +				const uint8_t *custom_sdp_message, +				unsigned int sdp_message_size); +  	void (*stop_dp_info_packets)(  		struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 067d53caf28a..0b8c6896581f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -70,14 +70,6 @@ enum crtc_state {  	CRTC_STATE_VACTIVE  }; -struct _dlg_otg_param { -	int vstartup_start; -	int vupdate_offset; -	int vupdate_width; -	int vready_offset; -	enum signal_type signal; -}; -  struct vupdate_keepout_params {  	int start_offset;  	int end_offset; @@ -126,7 +118,6 @@ struct timing_generator {  	const struct timing_generator_funcs *funcs;  	struct dc_bios *bp;  	struct dc_context *ctx; -	struct _dlg_otg_param dlg_otg_param;  	int inst;  }; @@ -140,7 +131,13 @@ struct timing_generator_funcs {  							const struct dc_crtc_timing *timing);  	void (*program_timing)(struct timing_generator *tg,  							const struct dc_crtc_timing *timing, -							bool use_vbios); +							int vready_offset, +							int vstartup_start, +							int vupdate_offset, +							int vupdate_width, +							const enum signal_type signal, +							bool use_vbios +	);  	void (*setup_vertical_interrupt0)(  			struct timing_generator *optc,  			uint32_t start_line, @@ -210,7 +207,11 @@ struct timing_generator_funcs {  	bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width); -	void (*program_global_sync)(struct timing_generator *tg); +	void (*program_global_sync)(struct timing_generator *tg, +			int vready_offset, +			int vstartup_start, +			int vupdate_offset, +			int vupdate_width);  	void (*enable_optc_clock)(struct timing_generator *tg, bool enable);  	void (*program_stereo)(struct timing_generator *tg,  		const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); @@ -237,6 +238,8 @@ struct timing_generator_funcs {  	bool (*get_crc)(struct timing_generator *tg,  			uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); +	void (*set_vtg_params)(struct timing_generator *optc, +			const struct dc_crtc_timing *dc_crtc_timing);  };  #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 33905468e2b9..eb1c12ed026a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -158,6 +158,11 @@ struct hw_sequencer_funcs {  	void (*update_info_frame)(struct pipe_ctx *pipe_ctx); +	void (*send_immediate_sdp_message)( +				struct pipe_ctx *pipe_ctx, +				const uint8_t *custom_sdp_message, +				unsigned int sdp_message_size); +  	void (*enable_stream)(struct pipe_ctx *pipe_ctx);  	void (*disable_stream)(struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 01bf01a34a08..c30437ae8395 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -307,7 +307,8 @@ struct bp_encoder_cap_info {  	uint32_t DP_HBR2_EN:1;  	uint32_t DP_HBR3_EN:1;  	uint32_t HDMI_6GB_EN:1; -	uint32_t RESERVED:30; +	uint32_t DP_IS_USB_C:1; +	uint32_t RESERVED:27;  };  #endif /*__DAL_BIOS_PARSER_TYPES_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 4c8ce7938f01..63c3e77159d9 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -131,21 +131,18 @@  #define INTERNAL_REV_RAVEN_A0             0x00    /* First spin of Raven */  #define RAVEN_A0 0x01  #define RAVEN_B0 0x21 -#define PICASSO_A0 0x41 -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  /* DCN1_01 */ +#define PICASSO_A0 0x41  #define RAVEN2_A0 0x81 -#endif +#define RAVEN1_F0 0xF0  #define RAVEN_UNKNOWN 0xFF  #define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)  #define RAVEN1_F0 0xF0  #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))  #define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */  #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h index f5bd869d4320..dabdbc0999d4 100644 --- a/drivers/gpu/drm/amd/display/include/dal_types.h +++ b/drivers/gpu/drm/amd/display/include/dal_types.h @@ -45,9 +45,7 @@ enum dce_version {  	DCE_VERSION_12_1,  	DCE_VERSION_MAX,  	DCN_VERSION_1_0, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01)  	DCN_VERSION_1_01, -#endif /* DCN1_01 */  	DCN_VERSION_MAX  }; diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h index 2b836e582c08..845fea8a387f 100644 --- a/drivers/gpu/drm/amd/display/include/set_mode_types.h +++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h @@ -84,7 +84,10 @@ union hdmi_info_packet {  		uint16_t bar_left;  		uint16_t bar_right; -		uint8_t reserved[14]; +		uint8_t F140_F143:4; +		uint8_t ACE0_ACE3:4; + +		uint8_t reserved[13];  	} bits;  	struct info_packet_raw_data packet_raw_data; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index a1055413bade..8601d371776e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -240,16 +240,27 @@ struct dividers {  	struct fixed31_32 divider3;  }; -static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) +enum gamma_type_index { +	gamma_type_index_2_4, +	gamma_type_index_2_2, +	gamma_type_index_2_2_flat +}; + +static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type)  { -	static const int32_t numerator01[] = { 31308, 180000}; -	static const int32_t numerator02[] = { 12920, 4500}; -	static const int32_t numerator03[] = { 55, 99}; -	static const int32_t numerator04[] = { 55, 99}; -	static const int32_t numerator05[] = { 2400, 2200}; +	static const int32_t numerator01[] = { 31308,	180000,	0}; +	static const int32_t numerator02[] = { 12920,	4500,	0}; +	static const int32_t numerator03[] = { 55,		99,		0}; +	static const int32_t numerator04[] = { 55,		99,		0}; +	static const int32_t numerator05[] = { 2400,	2200, 2200};  	uint32_t i = 0; -	uint32_t index = is_2_4 == true ? 0:1; +	uint32_t index = 0; + +	if (type == gamma_type_index_2_2) +		index = 1; +	else if (type == gamma_type_index_2_2_flat) +		index = 2;  	do {  		coefficients->a0[i] = dc_fixpt_from_fraction( @@ -697,7 +708,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,  static void build_regamma(struct pwl_float_data_ex *rgb_regamma,  		uint32_t hw_points_num, -		const struct hw_x_point *coordinate_x, bool is_2_4) +		const struct hw_x_point *coordinate_x, enum gamma_type_index type)  {  	uint32_t i; @@ -705,7 +716,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma,  	struct pwl_float_data_ex *rgb = rgb_regamma;  	const struct hw_x_point *coord_x = coordinate_x; -	build_coefficients(&coeff, is_2_4); +	build_coefficients(&coeff, type);  	i = 0; @@ -892,13 +903,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,  static void build_degamma(struct pwl_float_data_ex *curve,  		uint32_t hw_points_num, -		const struct hw_x_point *coordinate_x, bool is_2_4) +		const struct hw_x_point *coordinate_x, enum gamma_type_index type)  {  	uint32_t i;  	struct gamma_coefficients coeff;  	uint32_t begin_index, end_index; -	build_coefficients(&coeff, is_2_4); +	build_coefficients(&coeff, type);  	i = 0;  	/* X points is 2^-25 to 2^7 @@ -1614,7 +1625,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,  				coordinates_x,  				output_tf->sdr_ref_white_level);  	} else if (tf == TRANSFER_FUNCTION_GAMMA22 && -			fs_params != NULL) { +			fs_params != NULL && fs_params->skip_tm == 0) {  		build_freesync_hdr(rgb_regamma,  				MAX_HW_POINTS,  				coordinates_x, @@ -1627,7 +1638,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,  		build_regamma(rgb_regamma,  				MAX_HW_POINTS, -				coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); +				coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 : +					tf == TRANSFER_FUNCTION_GAMMA22 ? +					gamma_type_index_2_2_flat : gamma_type_index_2_2);  	}  	map_regamma_hw_to_x_user(ramp, coeff, rgb_user,  			coordinates_x, axis_x, rgb_regamma, @@ -1832,7 +1845,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,  		build_degamma(curve,  				MAX_HW_POINTS,  				coordinates_x, -				tf == TRANSFER_FUNCTION_SRGB ? true : false); +				tf == TRANSFER_FUNCTION_SRGB ? +				gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ? +				gamma_type_index_2_2_flat : gamma_type_index_2_2);  	else if (tf == TRANSFER_FUNCTION_LINEAR) {  		// just copy coordinates_x into curve  		i = 0; @@ -1932,7 +1947,10 @@ bool  mod_color_calculate_curve(enum dc_transfer_func_predefined trans,  		build_regamma(rgb_regamma,  				MAX_HW_POINTS, -				coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); +				coordinates_x, +				trans == TRANSFER_FUNCTION_SRGB ? +				gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? +				gamma_type_index_2_2_flat : gamma_type_index_2_2);  		for (i = 0; i <= MAX_HW_POINTS ; i++) {  			points->red[i]    = rgb_regamma[i].r;  			points->green[i]  = rgb_regamma[i].g; @@ -2002,7 +2020,8 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,  		kvfree(rgb_degamma);  	} else if (trans == TRANSFER_FUNCTION_SRGB || -			  trans == TRANSFER_FUNCTION_BT709) { +			  trans == TRANSFER_FUNCTION_BT709 || +			  trans == TRANSFER_FUNCTION_GAMMA22) {  		rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,  				       sizeof(*rgb_degamma),  				       GFP_KERNEL); @@ -2011,7 +2030,10 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,  		build_degamma(rgb_degamma,  				MAX_HW_POINTS, -				coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); +				coordinates_x, +				trans == TRANSFER_FUNCTION_SRGB ? +				gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? +				gamma_type_index_2_2_flat : gamma_type_index_2_2);  		for (i = 0; i <= MAX_HW_POINTS ; i++) {  			points->red[i]    = rgb_degamma[i].r;  			points->green[i]  = rgb_degamma[i].g; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index a6e164df090a..369953fafadf 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -79,6 +79,7 @@ struct freesync_hdr_tf_params {  	unsigned int max_content; // luminance in nits  	unsigned int min_display; // luminance in 1/10000 nits  	unsigned int max_display; // luminance in nits +	unsigned int skip_tm; // skip tm  };  void setup_x_points_distribution(void); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index db06fab2ad5c..bc13c552797f 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,  	if (stream->psr_version != 0)  		vscPacketRevision = 2; -	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) +	/* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */ +	if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && +			stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)  		vscPacketRevision = 5;  	/* VSC packet not needed based on the features diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index a9575db8d7aa..6efcaa93e17b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -30,4 +30,22 @@  #define mmDF_CS_UMC_AON0_DramBaseAddress0								0x0044  #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX							0 +#define smnPerfMonCtlLo0					0x01d440UL +#define smnPerfMonCtlHi0					0x01d444UL +#define smnPerfMonCtlLo1					0x01d450UL +#define smnPerfMonCtlHi1					0x01d454UL +#define smnPerfMonCtlLo2					0x01d460UL +#define smnPerfMonCtlHi2					0x01d464UL +#define smnPerfMonCtlLo3					0x01d470UL +#define smnPerfMonCtlHi3					0x01d474UL + +#define smnPerfMonCtrLo0					0x01d448UL +#define smnPerfMonCtrHi0					0x01d44cUL +#define smnPerfMonCtrLo1					0x01d458UL +#define smnPerfMonCtrHi1					0x01d45cUL +#define smnPerfMonCtrLo2					0x01d468UL +#define smnPerfMonCtrHi2					0x01d46cUL +#define smnPerfMonCtrLo3					0x01d478UL +#define smnPerfMonCtrHi3					0x01d47cUL +  #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index 529b37db274c..f1d048e0ed2c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -829,6 +829,8 @@  #define mmTD_CNTL_BASE_IDX                                                                             0  #define mmTD_STATUS                                                                                    0x0526  #define mmTD_STATUS_BASE_IDX                                                                           0 +#define mmTD_EDC_CNT                                                                                   0x052e +#define mmTD_EDC_CNT_BASE_IDX                                                                          0  #define mmTD_DSM_CNTL                                                                                  0x052f  #define mmTD_DSM_CNTL_BASE_IDX                                                                         0  #define mmTD_DSM_CNTL2                                                                                 0x0530 @@ -845,6 +847,8 @@  #define mmTA_STATUS_BASE_IDX                                                                           0  #define mmTA_SCRATCH                                                                                   0x0564  #define mmTA_SCRATCH_BASE_IDX                                                                          0 +#define mmTA_EDC_CNT                                                                                   0x0586 +#define mmTA_EDC_CNT_BASE_IDX                                                                          0  // addressBlock: gc_gdsdec @@ -1051,6 +1055,13 @@  #define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX                                                          0 +// addressBlock: gc_ea_gceadec2 +// base address: 0x9c00 +#define mmGCEA_EDC_CNT                                                                                 0x0706 +#define mmGCEA_EDC_CNT_BASE_IDX                                                                        0 +#define mmGCEA_EDC_CNT2                                                                                0x0707 +#define mmGCEA_EDC_CNT2_BASE_IDX                                                                       0 +  // addressBlock: gc_rmi_rmidec  // base address: 0x9e00  #define mmRMI_GENERAL_CNTL                                                                             0x0780 @@ -1709,6 +1720,8 @@  #define mmTC_CFG_L1_VOLATILE_BASE_IDX                                                                  0  #define mmTC_CFG_L2_VOLATILE                                                                           0x0b23  #define mmTC_CFG_L2_VOLATILE_BASE_IDX                                                                  0 +#define mmTCI_EDC_CNT                                                                                  0x0b60 +#define mmTCI_EDC_CNT_BASE_IDX                                                                         0  #define mmTCI_STATUS                                                                                   0x0b61  #define mmTCI_STATUS_BASE_IDX                                                                          0  #define mmTCI_CNTL_1                                                                                   0x0b62 @@ -2594,6 +2607,24 @@  #define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX                                                        0  #define mmCP_RB_DOORBELL_CLEAR                                                                         0x1188  #define mmCP_RB_DOORBELL_CLEAR_BASE_IDX                                                                0 +#define mmCPF_EDC_TAG_CNT                                                                              0x1189 +#define mmCPF_EDC_TAG_CNT_BASE_IDX                                                                     0 +#define mmCPF_EDC_ROQ_CNT                                                                              0x118a +#define mmCPF_EDC_ROQ_CNT_BASE_IDX                                                                     0 +#define mmCPG_EDC_TAG_CNT                                                                              0x118b +#define mmCPG_EDC_TAG_CNT_BASE_IDX                                                                     0 +#define mmCPG_EDC_DMA_CNT                                                                              0x118d +#define mmCPG_EDC_DMA_CNT_BASE_IDX                                                                     0 +#define mmCPC_EDC_SCRATCH_CNT                                                                          0x118e +#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX                                                                 0 +#define mmCPC_EDC_UCODE_CNT                                                                            0x118f +#define mmCPC_EDC_UCODE_CNT_BASE_IDX                                                                   0 +#define mmDC_EDC_STATE_CNT                                                                             0x1191 +#define mmDC_EDC_STATE_CNT_BASE_IDX                                                                    0 +#define mmDC_EDC_CSINVOC_CNT                                                                           0x1192 +#define mmDC_EDC_CSINVOC_CNT_BASE_IDX                                                                  0 +#define mmDC_EDC_RESTORE_CNT                                                                           0x1193 +#define mmDC_EDC_RESTORE_CNT_BASE_IDX                                                                  0  #define mmCP_GFX_MQD_CONTROL                                                                           0x11a0  #define mmCP_GFX_MQD_CONTROL_BASE_IDX                                                                  0  #define mmCP_GFX_MQD_BASE_ADDR                                                                         0x11a1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h index 8c75669eb500..9470ec5e0f42 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h @@ -54,5 +54,8 @@  #define smnPCIE_PERF_COUNT0_TXCLK2			0x11180258  #define smnPCIE_PERF_COUNT1_TXCLK2			0x1118025c +#define smnPCIE_RX_NUM_NAK				0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c +  #endif	// _nbio_6_1_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h index 5563f0715896..caf5ffdc130a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h @@ -51,4 +51,7 @@  #define smnPCIE_PERF_COUNT0_TXCLK2			0x11180258  #define smnPCIE_PERF_COUNT1_TXCLK2			0x1118025c +#define smnPCIE_RX_NUM_NAK				0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c +  #endif	// _nbio_7_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h index c1457d880c4d..4bcacf529852 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h @@ -50,4 +50,7 @@  #define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL		0x1118024c  #define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL		0x11180250 +#define smnPCIE_RX_NUM_NAK				0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c +  #endif	// _nbio_7_4_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h index 749eab94e335..699e658c3cec 100644 --- a/drivers/gpu/drm/amd/include/cik_structs.h +++ b/drivers/gpu/drm/amd/include/cik_structs.h @@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers {  	uint32_t reserved_123;  	uint32_t reserved_124;  	uint32_t reserved_125; -	uint32_t reserved_126; -	uint32_t reserved_127; +	/* reserved_126,127: repurposed for driver-internal use */  	uint32_t sdma_engine_id;  	uint32_t sdma_queue_id;  }; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b897aca9b4c9..98b9533e672b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -174,6 +174,7 @@ struct tile_config {  #define ALLOC_MEM_FLAGS_GTT		(1 << 1)  #define ALLOC_MEM_FLAGS_USERPTR		(1 << 2)  #define ALLOC_MEM_FLAGS_DOORBELL	(1 << 3) +#define ALLOC_MEM_FLAGS_MMIO_REMAP	(1 << 4)  /*   * Allocation flags attributes/access options. diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2b579ba9b685..9f661bf96ed0 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -109,8 +109,12 @@ enum amd_pp_sensors {  	AMDGPU_PP_SENSOR_UVD_DCLK,  	AMDGPU_PP_SENSOR_VCE_ECCLK,  	AMDGPU_PP_SENSOR_GPU_LOAD, +	AMDGPU_PP_SENSOR_MEM_LOAD,  	AMDGPU_PP_SENSOR_GFX_MCLK,  	AMDGPU_PP_SENSOR_GPU_TEMP, +	AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, +	AMDGPU_PP_SENSOR_HOTSPOT_TEMP, +	AMDGPU_PP_SENSOR_MEM_TEMP,  	AMDGPU_PP_SENSOR_VCE_POWER,  	AMDGPU_PP_SENSOR_UVD_POWER,  	AMDGPU_PP_SENSOR_GPU_POWER, @@ -159,6 +163,13 @@ struct pp_states_info {  	uint32_t states[16];  }; +enum PP_HWMON_TEMP { +	PP_TEMP_EDGE = 0, +	PP_TEMP_JUNCTION, +	PP_TEMP_MEM, +	PP_TEMP_MAX +}; +  #define PP_GROUP_MASK        0xF0000000  #define PP_GROUP_SHIFT       28 diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index ceaf4932258d..8b383dbe1cda 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -151,8 +151,7 @@ struct v9_sdma_mqd {  	uint32_t reserved_123;  	uint32_t reserved_124;  	uint32_t reserved_125; -	uint32_t reserved_126; -	uint32_t reserved_127; +	/* reserved_126,127: repurposed for driver-internal use */  	uint32_t sdma_engine_id;  	uint32_t sdma_queue_id;  }; diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h index 717fbae1d362..c17613287cd0 100644 --- a/drivers/gpu/drm/amd/include/vi_structs.h +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -151,8 +151,7 @@ struct vi_sdma_mqd {  	uint32_t reserved_123;  	uint32_t reserved_124;  	uint32_t reserved_125; -	uint32_t reserved_126; -	uint32_t reserved_127; +	/* reserved_126,127: repurposed for driver-internal use */  	uint32_t sdma_engine_id;  	uint32_t sdma_queue_id;  }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index eec329ab6037..3026c7e2d3ea 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -30,6 +30,36 @@  #include "atom.h"  #include "amd_pcie.h" +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version) +{ +	int ret = 0; + +	if (!if_version && !smu_version) +		return -EINVAL; + +	if (if_version) { +		ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); +		if (ret) +			return ret; + +		ret = smu_read_smc_arg(smu, if_version); +		if (ret) +			return ret; +	} + +	if (smu_version) { +		ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion); +		if (ret) +			return ret; + +		ret = smu_read_smc_arg(smu, smu_version); +		if (ret) +			return ret; +	} + +	return ret; +} +  int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,  			   bool gate)  { @@ -168,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)  	ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;  	int ret = 0; +	if (!smu->pm_enabled) +		return -EINVAL;  	if (header->usStructureSize != size) {  		pr_err("pp table size not matched !\n");  		return -EIO; @@ -203,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu)  	int ret = 0;  	uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32]; +	if (!smu->pm_enabled) +		return ret;  	mutex_lock(&feature->mutex);  	bitmap_fill(feature->allowed, SMU_FEATURE_MAX);  	mutex_unlock(&feature->mutex); @@ -314,6 +348,7 @@ static int smu_early_init(void *handle)  	struct smu_context *smu = &adev->smu;  	smu->adev = adev; +	smu->pm_enabled = !!amdgpu_dpm;  	mutex_init(&smu->mutex);  	return smu_set_funcs(adev); @@ -323,6 +358,9 @@ static int smu_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct smu_context *smu = &adev->smu; + +	if (!smu->pm_enabled) +		return 0;  	mutex_lock(&smu->mutex);  	smu_handle_task(&adev->smu,  			smu->smu_dpm.dpm_level, @@ -406,9 +444,6 @@ static int smu_sw_init(void *handle)  	struct smu_context *smu = &adev->smu;  	int ret; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	smu->pool_size = adev->pm.smu_prv_buffer_size;  	smu->smu_feature.feature_num = SMU_FEATURE_MAX;  	mutex_init(&smu->smu_feature.mutex); @@ -460,9 +495,6 @@ static int smu_sw_fini(void *handle)  	struct smu_context *smu = &adev->smu;  	int ret; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	ret = smu_smc_table_sw_fini(smu);  	if (ret) {  		pr_err("Failed to sw fini smc table!\n"); @@ -612,10 +644,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu,  		 * check if the format_revision in vbios is up to pptable header  		 * version, and the structure size is not 0.  		 */ -		ret = smu_get_clk_info_from_vbios(smu); -		if (ret) -			return ret; -  		ret = smu_check_pptable(smu);  		if (ret)  			return ret; @@ -716,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu,  	 */  	ret = smu_set_tool_table_location(smu); +	if (!smu_is_dpm_running(smu)) +		pr_info("dpm has been disabled\n"); +  	return ret;  } @@ -788,9 +819,6 @@ static int smu_hw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct smu_context *smu = &adev->smu; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {  		ret = smu_load_microcode(smu);  		if (ret) @@ -831,7 +859,10 @@ static int smu_hw_init(void *handle)  	mutex_unlock(&smu->mutex); -	adev->pm.dpm_enabled = true; +	if (!smu->pm_enabled) +		adev->pm.dpm_enabled = false; +	else +		adev->pm.dpm_enabled = true;  	pr_info("SMU is initialized successfully!\n"); @@ -849,9 +880,6 @@ static int smu_hw_fini(void *handle)  	struct smu_table_context *table_context = &smu->smu_table;  	int ret = 0; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	kfree(table_context->driver_pptable);  	table_context->driver_pptable = NULL; @@ -906,9 +934,6 @@ static int smu_suspend(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct smu_context *smu = &adev->smu; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	ret = smu_system_features_control(smu, false);  	if (ret)  		return ret; @@ -924,9 +949,6 @@ static int smu_resume(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct smu_context *smu = &adev->smu; -	if (!is_support_sw_smu(adev)) -		return -EINVAL; -  	pr_info("SMU is resuming...\n");  	mutex_lock(&smu->mutex); @@ -955,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu,  	int index = 0;  	int num_of_active_display = 0; -	if (!is_support_sw_smu(smu->adev)) +	if (!smu->pm_enabled || !is_support_sw_smu(smu->adev))  		return -EINVAL;  	if (!display_config) @@ -1083,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle,  	struct smu_context *smu = (struct smu_context*)(handle);  	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); -	if (!smu_dpm_ctx->dpm_context) +	if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)  		return -EINVAL;  	if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { @@ -1126,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,  	long workload;  	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); +	if (!smu->pm_enabled) +		return -EINVAL;  	if (!skip_display_settings) {  		ret = smu_display_config_changed(smu);  		if (ret) { @@ -1134,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,  		}  	} +	if (!smu->pm_enabled) +		return -EINVAL;  	ret = smu_apply_clocks_adjust_rules(smu);  	if (ret) {  		pr_err("Failed to apply clocks adjust rules!"); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 70f7f47a2fcf..cc57fb953e62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -225,7 +225,16 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)  int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)  {  	int ret = 0; -	struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; +	struct PP_TemperatureRange range = { +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX};  	struct amdgpu_device *adev = hwmgr->adev;  	if (hwmgr->hwmgr_func->get_thermal_temperature_range) @@ -239,6 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)  	adev->pm.dpm.thermal.min_temp = range.min;  	adev->pm.dpm.thermal.max_temp = range.max; +	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; +	adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; +	adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; +	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; +	adev->pm.dpm.thermal.min_mem_temp = range.mem_min; +	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; +	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;  	return ret;  } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 048757e8f494..16591be8b0ca 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3532,9 +3532,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,  		*size = 4;  		return 0;  	case AMDGPU_PP_SENSOR_GPU_LOAD: +	case AMDGPU_PP_SENSOR_MEM_LOAD:  		offset = data->soft_regs_start + smum_get_offsetof(hwmgr,  								SMU_SoftRegisters, -								AverageGraphicsActivity); +								(idx == AMDGPU_PP_SENSOR_GPU_LOAD) ? +								AverageGraphicsActivity: +								AverageMemoryActivity);  		activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);  		activity_percent += 0x80; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 384c37875cd0..1d9bb29adaef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -356,6 +356,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)  	struct vega10_hwmgr *data = hwmgr->backend;  	int i;  	uint32_t sub_vendor_id, hw_revision; +	uint32_t top32, bottom32;  	struct amdgpu_device *adev = hwmgr->adev;  	vega10_initialize_power_tune_defaults(hwmgr); @@ -499,6 +500,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)  		(hw_revision == 0) &&  		(sub_vendor_id != 0x1002))  		data->smu_features[GNLD_PCC_LIMIT].supported = true; + +	/* Get the SN to turn into a Unique ID */ +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); +	top32 = smum_get_argument(hwmgr); +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); +	bottom32 = smum_get_argument(hwmgr); + +	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;  }  #ifdef PPLIB_VEGA10_EVV_SUPPORT @@ -2267,8 +2276,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)  			pp_table->AcgAvfsGb.m1                   = avfs_params.ulAcgGbFuseTableM1;  			pp_table->AcgAvfsGb.m2                   = avfs_params.ulAcgGbFuseTableM2;  			pp_table->AcgAvfsGb.b                    = avfs_params.ulAcgGbFuseTableB; -			pp_table->AcgAvfsGb.m1_shift             = 0; -			pp_table->AcgAvfsGb.m2_shift             = 0; +			pp_table->AcgAvfsGb.m1_shift             = 24; +			pp_table->AcgAvfsGb.m2_shift             = 12;  			pp_table->AcgAvfsGb.b_shift              = 0;  		} else { @@ -2364,6 +2373,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable)  	struct vega10_hwmgr *data = hwmgr->backend;  	if (data->smu_features[GNLD_AVFS].supported) { +		/* Already enabled or disabled */ +		if (!(enable ^ data->smu_features[GNLD_AVFS].enabled)) +			return 0; +  		if (enable) {  			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,  					true, @@ -2466,11 +2479,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)  			return;  		}  	} - -	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { -		data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; -		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; -	}  }  /** @@ -3683,6 +3691,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,  	vega10_update_avfs(hwmgr); +	/* +	 * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC. +	 * That will help to keep AVFS disabled. +	 */  	data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;  	return 0; @@ -3785,6 +3797,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,  		*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);  		*size = 4;  		break; +	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: +		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot); +		*((uint32_t *)value) = smum_get_argument(hwmgr) * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break; +	case AMDGPU_PP_SENSOR_MEM_TEMP: +		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM); +		*((uint32_t *)value) = smum_get_argument(hwmgr) * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break;  	case AMDGPU_PP_SENSOR_UVD_POWER:  		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;  		*size = 4; @@ -4852,12 +4876,22 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,  static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,  		struct PP_TemperatureRange *thermal_data)  { -	struct phm_ppt_v2_information *table_info = -			(struct phm_ppt_v2_information *)hwmgr->pptable; +	struct vega10_hwmgr *data = hwmgr->backend; +	PPTable_t *pp_table = &(data->smc_state_table.pp_table);  	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); -	thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp * +	thermal_data->max = pp_table->TedgeLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_crit_max = pp_table->ThbmLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*  		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;  	return 0; @@ -4988,13 +5022,70 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,  	return true;  } +static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr) +{ +	struct vega10_hwmgr *data = hwmgr->backend; +	struct pp_power_state *ps = hwmgr->request_ps; +	struct vega10_power_state *vega10_ps; +	struct vega10_single_dpm_table *gfx_dpm_table = +		&data->dpm_table.gfx_table; +	struct vega10_single_dpm_table *soc_dpm_table = +		&data->dpm_table.soc_table; +	struct vega10_single_dpm_table *mem_dpm_table = +		&data->dpm_table.mem_table; +	int max_level; + +	if (!ps) +		return; + +	vega10_ps = cast_phw_vega10_power_state(&ps->hardware); +	max_level = vega10_ps->performance_level_count - 1; + +	if (vega10_ps->performance_levels[max_level].gfx_clock != +	    gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].gfx_clock = +			gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + +	if (vega10_ps->performance_levels[max_level].soc_clock != +	    soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].soc_clock = +			soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + +	if (vega10_ps->performance_levels[max_level].mem_clock != +	    mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].mem_clock = +			mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; + +	if (!hwmgr->ps) +		return; + +	ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1)); +	vega10_ps = cast_phw_vega10_power_state(&ps->hardware); +	max_level = vega10_ps->performance_level_count - 1; + +	if (vega10_ps->performance_levels[max_level].gfx_clock != +	    gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].gfx_clock = +			gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + +	if (vega10_ps->performance_levels[max_level].soc_clock != +	    soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].soc_clock = +			soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + +	if (vega10_ps->performance_levels[max_level].mem_clock != +	    mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) +		vega10_ps->performance_levels[max_level].mem_clock = +			mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; +} +  static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,  						enum PP_OD_DPM_TABLE_COMMAND type)  {  	struct vega10_hwmgr *data = hwmgr->backend;  	struct phm_ppt_v2_information *table_info = hwmgr->pptable;  	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; -	struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; +	struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table;  	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =  							&data->odn_dpm_table.vdd_dep_on_socclk; @@ -5018,7 +5109,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,  					break;  			}  			if (j == od_vddc_lookup_table->count) { -				od_vddc_lookup_table->entries[j-1].us_vdd = +				j = od_vddc_lookup_table->count - 1; +				od_vddc_lookup_table->entries[j].us_vdd =  					podn_vdd_dep->entries[i].vddc;  				data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;  			} @@ -5026,25 +5118,38 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,  		}  		dpm_table = &data->dpm_table.soc_table;  		for (i = 0; i < dep_table->count; i++) { -			if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && -					dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { +			if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd && +					dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) {  				data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; -				podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; -				dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; +				for (; (i < dep_table->count) && +				       (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) { +					podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk; +					dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; +				} +				break; +			} else { +				dpm_table->dpm_levels[i].value = dep_table->entries[i].clk; +				podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc; +				podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd; +				podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk;  			}  		}  		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < -					podn_vdd_dep->entries[dep_table->count-1].clk) { +					podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) {  			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; -			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; -			dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; +			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = +				podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk; +			dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = +				podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;  		}  		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < -					podn_vdd_dep->entries[dep_table->count-1].vddInd) { +					podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) {  			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; -			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; +			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = +				podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd;  		}  	} +	vega10_odn_update_power_state(hwmgr);  }  static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, @@ -5079,6 +5184,11 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,  	} else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {  		memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));  		vega10_odn_initial_default_setting(hwmgr); +		vega10_odn_update_power_state(hwmgr); +		/* force to update all clock tables */ +		data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK | +					      DPMTABLE_UPDATE_MCLK | +					      DPMTABLE_UPDATE_SOCCLK;  		return 0;  	} else if (PP_OD_COMMIT_DPM_TABLE == type) {  		vega10_check_dpm_table_updated(hwmgr); @@ -5201,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {  int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  { +	struct amdgpu_device *adev = hwmgr->adev; +  	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;  	hwmgr->pptable_func = &vega10_pptable_funcs; +	if (amdgpu_passthrough(adev)) +		return vega10_baco_set_cap(hwmgr);  	return 0;  } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b6767d74dc85..83d22cdeaa29 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,  	return result;  } + +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) +{ +	int result = 0; + +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table; + +	powerplay_table = get_powerplay_table(hwmgr); + +	PP_ASSERT_WITH_CODE((powerplay_table != NULL), +		"Missing PowerPlay Table!", return -1); + +	result = check_powerplay_tables(hwmgr, powerplay_table); + +	PP_ASSERT_WITH_CODE((result == 0), +			    "check_powerplay_tables failed", return result); + +	set_hw_cap( +			hwmgr, +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO), +			PHM_PlatformCaps_BACO); +	return result; +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h index d83ed2af7aa3..da5fbec9b0cd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h @@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,  		struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,  				struct pp_power_state *, void *, uint32_t)); +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);  #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 707cd4b0357f..efb6d3762feb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr)  static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)  {  	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); +	struct amdgpu_device *adev = hwmgr->adev; +	uint32_t top32, bottom32;  	int i;  	data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)  			((data->registry_data.disallowed_features >> i) & 1) ?  			false : true;  	} + +	/* Get the SN to turn into a Unique ID */ +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); +	top32 = smum_get_argument(hwmgr); +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); +	bottom32 = smum_get_argument(hwmgr); + +	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;  }  static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) @@ -1237,21 +1247,39 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)  	return (mem_clk * 100);  } +static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table) +{ +	struct vega12_hwmgr *data = +			(struct vega12_hwmgr *)(hwmgr->backend); +	int ret = 0; + +	if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) { +		ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table, +				TABLE_SMU_METRICS, true); +		if (ret) { +			pr_info("Failed to export SMU metrics table!\n"); +			return ret; +		} +		memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t)); +		data->metrics_time = jiffies; +	} else +		memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t)); + +	return ret; +} +  static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)  { -#if 0 -	uint32_t value; +	SmuMetrics_t metrics_table; +	int ret = 0; -	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, -			PPSMC_MSG_GetCurrPkgPwr), -			"Failed to get current package power!", -			return -EINVAL); +	ret = vega12_get_metrics_table(hwmgr, &metrics_table); +	if (ret) +		return ret; -	value = smum_get_argument(hwmgr); -	/* power value is an integer */ -	*query = value << 8; -#endif -	return 0; +	*query = metrics_table.CurrSocketPower << 8; + +	return ret;  }  static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq) @@ -1290,25 +1318,27 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f  static int vega12_get_current_activity_percent(  		struct pp_hwmgr *hwmgr, +		int idx,  		uint32_t *activity_percent)  { +	SmuMetrics_t metrics_table;  	int ret = 0; -	uint32_t current_activity = 50; -#if 0 -	ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0); -	if (!ret) { -		current_activity = smum_get_argument(hwmgr); -		if (current_activity > 100) { -			PP_ASSERT(false, -				  "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!"); -			current_activity = 100; -		} -	} else -		PP_ASSERT(false, -			"[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!"); -#endif -	*activity_percent = current_activity; +	ret = vega12_get_metrics_table(hwmgr, &metrics_table); +	if (ret) +		return ret; + +	switch (idx) { +	case AMDGPU_PP_SENSOR_GPU_LOAD: +		*activity_percent = metrics_table.AverageGfxActivity; +		break; +	case AMDGPU_PP_SENSOR_MEM_LOAD: +		*activity_percent = metrics_table.AverageUclkActivity; +		break; +	default: +		pr_err("Invalid index for retrieving clock activity\n"); +		return -EINVAL; +	}  	return ret;  } @@ -1317,6 +1347,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,  			      void *value, int *size)  {  	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); +	SmuMetrics_t metrics_table;  	int ret = 0;  	switch (idx) { @@ -1331,7 +1362,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,  			*size = 4;  		break;  	case AMDGPU_PP_SENSOR_GPU_LOAD: -		ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value); +	case AMDGPU_PP_SENSOR_MEM_LOAD: +		ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);  		if (!ret)  			*size = 4;  		break; @@ -1339,6 +1371,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,  		*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);  		*size = 4;  		break; +	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: +		ret = vega12_get_metrics_table(hwmgr, &metrics_table); +		if (ret) +			return ret; + +		*((uint32_t *)value) = metrics_table.TemperatureHotspot * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break; +	case AMDGPU_PP_SENSOR_MEM_TEMP: +		ret = vega12_get_metrics_table(hwmgr, &metrics_table); +		if (ret) +			return ret; + +		*((uint32_t *)value) = metrics_table.TemperatureHBM * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break;  	case AMDGPU_PP_SENSOR_UVD_POWER:  		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;  		*size = 4; @@ -1349,6 +1399,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,  		break;  	case AMDGPU_PP_SENSOR_GPU_POWER:  		ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); +		if (!ret) +			*size = 4;  		break;  	case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK:  		ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value); @@ -2526,12 +2578,23 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,  static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,  		struct PP_TemperatureRange *thermal_data)  { -	struct phm_ppt_v3_information *pptable_information = -		(struct phm_ppt_v3_information *)hwmgr->pptable; +	struct vega12_hwmgr *data = +			(struct vega12_hwmgr *)(hwmgr->backend); +	PPTable_t *pp_table = &(data->smc_state_table.pp_table);  	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); -	thermal_data->max = pptable_information->us_software_shutdown_temp * +	thermal_data->max = pp_table->TedgeLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_crit_max = pp_table->ThbmLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*  		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;  	return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index b3e424d28994..73875399666a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -396,6 +396,9 @@ struct vega12_hwmgr {  	/* ---- Gfxoff ---- */  	bool                           gfxoff_controlled_by_driver; + +	unsigned long                  metrics_time; +	SmuMetrics_t                   metrics_table;  };  #define VEGA12_DPM2_NEAR_TDP_DEC                      10 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9b9f87b84910..f27c6fbb192e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)  	if (hwmgr->smu_version < 0x282100)  		data->registry_data.disallowed_features |= FEATURE_ECC_MASK; +	if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK; + +	if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK; + +	if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK; + +	if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK; + +	if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK; + +	if (!(hwmgr->feature_mask & PP_ULV_MASK)) +		data->registry_data.disallowed_features |= FEATURE_ULV_MASK; + +	if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK)) +		data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK; +  	data->registry_data.od_state_in_dc_support = 0;  	data->registry_data.thermal_support = 1;  	data->registry_data.skip_baco_hardware = 0; @@ -303,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)  static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)  {  	struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); +	struct amdgpu_device *adev = hwmgr->adev; +	uint32_t top32, bottom32;  	int i;  	data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -372,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)  			((data->registry_data.disallowed_features >> i) & 1) ?  			false : true;  	} + +	/* Get the SN to turn into a Unique ID */ +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); +	top32 = smum_get_argument(hwmgr); +	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); +	bottom32 = smum_get_argument(hwmgr); + +	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;  }  static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) @@ -2094,6 +2125,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,  }  static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, +		int idx,  		uint32_t *activity_percent)  {  	int ret = 0; @@ -2103,7 +2135,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,  	if (ret)  		return ret; -	*activity_percent = metrics_table.AverageGfxActivity; +	switch (idx) { +	case AMDGPU_PP_SENSOR_GPU_LOAD: +		*activity_percent = metrics_table.AverageGfxActivity; +		break; +	case AMDGPU_PP_SENSOR_MEM_LOAD: +		*activity_percent = metrics_table.AverageUclkActivity; +		break; +	default: +		pr_err("Invalid index for retrieving clock activity\n"); +		return -EINVAL; +	}  	return ret;  } @@ -2134,14 +2176,33 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,  			*size = 4;  		break;  	case AMDGPU_PP_SENSOR_GPU_LOAD: -		ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value); +	case AMDGPU_PP_SENSOR_MEM_LOAD: +		ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);  		if (!ret)  			*size = 4;  		break; -	case AMDGPU_PP_SENSOR_GPU_TEMP: +	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:  		*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);  		*size = 4;  		break; +	case AMDGPU_PP_SENSOR_EDGE_TEMP: +		ret = vega20_get_metrics_table(hwmgr, &metrics_table); +		if (ret) +			return ret; + +		*((uint32_t *)value) = metrics_table.TemperatureEdge * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break; +	case AMDGPU_PP_SENSOR_MEM_TEMP: +		ret = vega20_get_metrics_table(hwmgr, &metrics_table); +		if (ret) +			return ret; + +		*((uint32_t *)value) = metrics_table.TemperatureHBM * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		*size = 4; +		break;  	case AMDGPU_PP_SENSOR_UVD_POWER:  		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;  		*size = 4; @@ -3974,12 +4035,23 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,  static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,  		struct PP_TemperatureRange *thermal_data)  { -	struct phm_ppt_v3_information *pptable_information = -		(struct phm_ppt_v3_information *)hwmgr->pptable; +	struct vega20_hwmgr *data = +			(struct vega20_hwmgr *)(hwmgr->backend); +	PPTable_t *pp_table = &(data->smc_state_table.pp_table);  	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); -	thermal_data->max = pptable_information->us_software_shutdown_temp * +	thermal_data->max = pp_table->TedgeLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_crit_max = pp_table->ThbmLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*  		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;  	return 0; diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index c8b168b3413b..3eb1de9ecf73 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -401,8 +401,12 @@ struct smu_context  	uint32_t workload_setting[WORKLOAD_POLICY_MAX];  	uint32_t power_profile_mode;  	uint32_t default_power_profile_mode; +	bool pm_enabled;  	uint32_t smc_if_version; + +	unsigned long metrics_time; +	void *metrics_table;  };  struct pptable_funcs { @@ -458,6 +462,8 @@ struct pptable_funcs {  				      uint32_t *mclk_mask,  				      uint32_t *soc_mask);  	int (*set_cpu_power_state)(struct smu_context *smu); +	int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures); +	int (*get_ppfeature_status)(struct smu_context *smu, char *buf);  };  struct smu_funcs @@ -727,7 +733,10 @@ struct smu_funcs  	((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0)  #define smu_set_xgmi_pstate(smu, pstate) \  		((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) - +#define smu_set_ppfeature_status(smu, ppfeatures) \ +	((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL) +#define smu_get_ppfeature_status(smu, buf) \ +	((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL)  extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,  				   uint16_t *size, uint8_t *frev, uint8_t *crev, @@ -767,4 +776,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b  extern int smu_handle_task(struct smu_context *smu,  			   enum amd_dpm_forced_level level,  			   enum amd_pp_task task_id); +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version);  #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index a99b5cbb113e..a5f2227a3971 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -124,6 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {  struct PP_TemperatureRange {  	int min;  	int max; +	int edge_emergency_max; +	int hotspot_min; +	int hotspot_crit_max; +	int hotspot_emergency_max; +	int mem_min; +	int mem_crit_max; +	int mem_emergency_max;  };  struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h index 201d2b6329ab..3e30768f9e1c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -27,14 +27,18 @@  static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =  { -	{-273150,  99000}, -	{ 120000, 120000}, +	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, +	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},  };  static const struct PP_TemperatureRange SMU7ThermalPolicy[] =  { -	{-273150,  99000}, -	{ 120000, 120000}, +	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, +	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},  }; +#define CTF_OFFSET_EDGE			5 +#define CTF_OFFSET_HOTSPOT		5 +#define CTF_OFFSET_HBM			5 +  #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index aa8d81f4111e..02c965d64256 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -36,6 +36,9 @@  #define smnMP0_FW_INTF			0x30101c0  #define smnMP1_PUB_CTRL			0x3010b14 +#define TEMP_RANGE_MIN			(0) +#define TEMP_RANGE_MAX			(80 * 1000) +  struct smu_11_0_max_sustainable_clocks {  	uint32_t display_clock;  	uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 82550a8a3a3f..c5288831aa15 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -41,6 +41,7 @@ enum SMU_MEMBER {  	HandshakeDisables = 0,  	VoltageChangeTimeout,  	AverageGraphicsActivity, +	AverageMemoryActivity,  	PreVBlankGap,  	VBlankTimeout,  	UcodeLoadStatus, diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 92903a4cc4d8..d2eeb6240484 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -223,20 +223,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu)  static int smu_v11_0_check_fw_version(struct smu_context *smu)  { -	uint32_t smu_version = 0xff; +	uint32_t if_version = 0xff, smu_version = 0xff; +	uint16_t smu_major; +	uint8_t smu_minor, smu_debug;  	int ret = 0; -	ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); +	ret = smu_get_smc_version(smu, &if_version, &smu_version);  	if (ret) -		goto err; +		return ret; -	ret = smu_read_smc_arg(smu, &smu_version); -	if (ret) -		goto err; +	smu_major = (smu_version >> 16) & 0xffff; +	smu_minor = (smu_version >> 8) & 0xff; +	smu_debug = (smu_version >> 0) & 0xff; + +	pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n", +		if_version, smu_version, smu_major, smu_minor, smu_debug); -	if (smu_version != smu->smc_if_version) +	if (if_version != smu->smc_if_version) { +		pr_err("SMU driver if version not matched\n");  		ret = -EINVAL; -err: +	} +  	return ret;  } @@ -353,6 +360,8 @@ static int smu_v11_0_init_power(struct smu_context *smu)  {  	struct smu_power_context *smu_power = &smu->smu_power; +	if (!smu->pm_enabled) +		return 0;  	if (smu_power->power_context || smu_power->power_context_size != 0)  		return -EINVAL; @@ -362,6 +371,13 @@ static int smu_v11_0_init_power(struct smu_context *smu)  		return -ENOMEM;  	smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context); +	smu->metrics_time = 0; +	smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); +	if (!smu->metrics_table) { +		kfree(smu_power->power_context); +		return -ENOMEM; +	} +  	return 0;  } @@ -369,10 +385,14 @@ static int smu_v11_0_fini_power(struct smu_context *smu)  {  	struct smu_power_context *smu_power = &smu->smu_power; +	if (!smu->pm_enabled) +		return 0;  	if (!smu_power->power_context || smu_power->power_context_size == 0)  		return -EINVAL; +	kfree(smu->metrics_table);  	kfree(smu_power->power_context); +	smu->metrics_table = NULL;  	smu_power->power_context = NULL;  	smu_power->power_context_size = 0; @@ -634,6 +654,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)  {  	struct smu_table_context *table_context = &smu->smu_table; +	if (!smu->pm_enabled) +		return 0;  	if (!table_context)  		return -EINVAL; @@ -662,6 +684,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu)  static int smu_v11_0_init_display(struct smu_context *smu)  {  	int ret = 0; + +	if (!smu->pm_enabled) +		return ret;  	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0);  	return ret;  } @@ -671,6 +696,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32  	uint32_t feature_low = 0, feature_high = 0;  	int ret = 0; +	if (!smu->pm_enabled) +		return ret;  	if (feature_id >= 0 && feature_id < 31)  		feature_low = (1 << feature_id);  	else if (feature_id > 31 && feature_id < 63) @@ -777,10 +804,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu,  	uint32_t feature_mask[2];  	int ret = 0; -	ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : -				     SMU_MSG_DisableAllSmuFeatures)); -	if (ret) -		return ret; +	if (smu->pm_enabled) { +		ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : +					     SMU_MSG_DisableAllSmuFeatures)); +		if (ret) +			return ret; +	} +  	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);  	if (ret)  		return ret; @@ -797,6 +827,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu)  {  	int ret = 0; +	if (!smu->pm_enabled) +		return ret;  	if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT))  	    ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1); @@ -809,6 +841,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,  {  	int ret = 0; +	if (!smu->pm_enabled) +		return ret;  	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq,  					  clock_select << 16);  	if (ret) { @@ -995,9 +1029,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_  static int smu_v11_0_get_thermal_range(struct smu_context *smu,  				struct PP_TemperatureRange *range)  { +	PPTable_t *pptable = smu->smu_table.driver_pptable;  	memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); -	range->max = smu->smu_table.software_shutdown_temp * +	range->max = pptable->TedgeLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	range->hotspot_crit_max = pptable->ThotspotLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	range->mem_crit_max = pptable->ThbmLimit * +		PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +	range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)*  		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;  	return 0; @@ -1062,9 +1107,20 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu)  static int smu_v11_0_start_thermal_control(struct smu_context *smu)  {  	int ret = 0; -	struct PP_TemperatureRange range; +	struct PP_TemperatureRange range = { +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MIN, +		TEMP_RANGE_MAX, +		TEMP_RANGE_MAX};  	struct amdgpu_device *adev = smu->adev; +	if (!smu->pm_enabled) +		return ret;  	smu_v11_0_get_thermal_range(smu, &range);  	if (smu->smu_table.thermal_controller_type) { @@ -1082,11 +1138,39 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)  	adev->pm.dpm.thermal.min_temp = range.min;  	adev->pm.dpm.thermal.max_temp = range.max; +	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; +	adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; +	adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; +	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; +	adev->pm.dpm.thermal.min_mem_temp = range.mem_min; +	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; +	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + +	return ret; +} + +static int smu_v11_0_get_metrics_table(struct smu_context *smu, +		SmuMetrics_t *metrics_table) +{ +	int ret = 0; + +	if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) { +		ret = smu_update_table(smu, TABLE_SMU_METRICS, +				(void *)metrics_table, false); +		if (ret) { +			pr_info("Failed to export SMU metrics table!\n"); +			return ret; +		} +		memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t)); +		smu->metrics_time = jiffies; +	} else +		memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t));  	return ret;  }  static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, +						  enum amd_pp_sensors sensor,  						  uint32_t *value)  {  	int ret = 0; @@ -1095,31 +1179,64 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,  	if (!value)  		return -EINVAL; -	ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); +	ret = smu_v11_0_get_metrics_table(smu, &metrics);  	if (ret)  		return ret; -	*value = metrics.AverageGfxActivity; +	switch (sensor) { +	case AMDGPU_PP_SENSOR_GPU_LOAD: +		*value = metrics.AverageGfxActivity; +		break; +	case AMDGPU_PP_SENSOR_MEM_LOAD: +		*value = metrics.AverageUclkActivity; +		break; +	default: +		pr_err("Invalid sensor for retrieving clock activity\n"); +		return -EINVAL; +	}  	return 0;  } -static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value) +static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, +					     enum amd_pp_sensors sensor, +					     uint32_t *value)  {  	struct amdgpu_device *adev = smu->adev; +	SmuMetrics_t metrics;  	uint32_t temp = 0; +	int ret = 0;  	if (!value)  		return -EINVAL; -	temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); -	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> -			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; +	ret = smu_v11_0_get_metrics_table(smu, &metrics); +	if (ret) +		return ret; -	temp = temp & 0x1ff; -	temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; +	switch (sensor) { +	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: +		temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); +		temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> +				CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; + +		temp = temp & 0x1ff; +		temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; -	*value = temp; +		*value = temp; +		break; +	case AMDGPU_PP_SENSOR_EDGE_TEMP: +		*value = metrics.TemperatureEdge * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		break; +	case AMDGPU_PP_SENSOR_MEM_TEMP: +		*value = metrics.TemperatureHBM * +			PP_TEMPERATURE_UNITS_PER_CENTIGRADES; +		break; +	default: +		pr_err("Invalid sensor for retrieving temp\n"); +		return -EINVAL; +	}  	return 0;  } @@ -1132,7 +1249,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value)  	if (!value)  		return -EINVAL; -	ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); +	ret = smu_v11_0_get_metrics_table(smu, &metrics);  	if (ret)  		return ret; @@ -1174,7 +1291,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,  	int ret = 0;  	switch (sensor) {  	case AMDGPU_PP_SENSOR_GPU_LOAD: +	case AMDGPU_PP_SENSOR_MEM_LOAD:  		ret = smu_v11_0_get_current_activity_percent(smu, +							     sensor,  							     (uint32_t *)data);  		*size = 4;  		break; @@ -1186,8 +1305,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,  		ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data);  		*size = 4;  		break; -	case AMDGPU_PP_SENSOR_GPU_TEMP: -		ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data); +	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: +	case AMDGPU_PP_SENSOR_EDGE_TEMP: +	case AMDGPU_PP_SENSOR_MEM_TEMP: +		ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data);  		*size = 4;  		break;  	case AMDGPU_PP_SENSOR_GPU_POWER: @@ -1235,6 +1356,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,  	PPCLK_e clk_select = 0;  	uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; +	if (!smu->pm_enabled) +		return -EINVAL;  	if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) {  		switch (clk_type) {  		case amd_pp_dcef_clock: @@ -1518,7 +1641,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf)  			"PD_Data_error_rate_coeff"};  	int result = 0; -	if (!buf) +	if (!smu->pm_enabled || !buf)  		return -EINVAL;  	size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", @@ -1605,6 +1728,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input  	smu->power_profile_mode = input[size]; +	if (!smu->pm_enabled) +		return ret;  	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {  		pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);  		return -EINVAL; @@ -1710,24 +1835,24 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu,  static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable)  { -	if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) +	if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))  		return 0; -	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) +	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))  		return 0; -	return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); +	return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);  }  static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable)  { -	if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) +	if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))  		return 0; -	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) +	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))  		return 0; -	return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); +	return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);  }  static int smu_v11_0_get_current_rpm(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 669bd0c2a16c..9ef57fcf7e78 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2254,6 +2254,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU7_SoftRegisters, AverageGraphicsA); +		case AverageMemoryActivity: +			return offsetof(SMU7_SoftRegisters, AverageMemoryA);  		case PreVBlankGap:  			return offsetof(SMU7_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index bc8375cbf297..0ce85b73338e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); +		case AverageMemoryActivity: +			return offsetof(SMU73_SoftRegisters, AverageMemoryActivity);  		case PreVBlankGap:  			return offsetof(SMU73_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 375ccf6ff5f2..f24f13d77808 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -2219,6 +2219,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); +		case AverageMemoryActivity: +			return offsetof(SMU71_SoftRegisters, AverageMemoryActivity);  		case PreVBlankGap:  			return offsetof(SMU71_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2d4cfe14f72e..0d8958e71b94 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -2313,6 +2313,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); +		case AverageMemoryActivity: +			return offsetof(SMU74_SoftRegisters, AverageMemoryActivity);  		case PreVBlankGap:  			return offsetof(SMU74_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 3ed6c5f1e5cf..060c0f7f5238 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -2611,6 +2611,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); +		case AverageMemoryActivity: +			return offsetof(SMU72_SoftRegisters, AverageMemoryActivity);  		case PreVBlankGap:  			return offsetof(SMU72_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index ddb801517667..1eaf0fa28ef7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)  	priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;  	priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t); +	/* allocate space for SMU_METRICS table */ +	ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, +				      sizeof(SmuMetrics_t), +				      PAGE_SIZE, +				      AMDGPU_GEM_DOMAIN_VRAM, +				      &priv->smu_tables.entry[TABLE_SMU_METRICS].handle, +				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, +				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table); +	if (ret) +		goto err4; + +	priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01; +	priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t); +  	return 0; +err4: +	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, +				&priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, +				&priv->smu_tables.entry[TABLE_OVERDRIVE].table);  err3:  	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,  				&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr, @@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)  		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,  				      &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,  				      &priv->smu_tables.entry[TABLE_OVERDRIVE].table); +		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle, +				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, +				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table);  		kfree(hwmgr->smu_backend);  		hwmgr->smu_backend = NULL;  	} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 1e69300f6175..d499204b2184 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)  			return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);  		case AverageGraphicsActivity:  			return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); +		case AverageMemoryActivity: +			return offsetof(SMU75_SoftRegisters, AverageMemoryActivity);  		case PreVBlankGap:  			return offsetof(SMU75_SoftRegisters, PreVBlankGap);  		case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 8fafcbdb1dfd..4aa8f5a69c4c 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu,  	return ret;  } +static int vega20_get_enabled_smc_features(struct smu_context *smu, +		uint64_t *features_enabled) +{ +	uint32_t feature_mask[2] = {0, 0}; +	int ret = 0; + +	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); +	if (ret) +		return ret; + +	*features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) | +			(((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK)); + +	return ret; +} + +static int vega20_enable_smc_features(struct smu_context *smu, +		bool enable, uint64_t feature_mask) +{ +	uint32_t smu_features_low, smu_features_high; +	int ret = 0; + +	smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT); +	smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT); + +	if (enable) { +		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, +						  smu_features_low); +		if (ret) +			return ret; +		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, +						  smu_features_high); +		if (ret) +			return ret; +	} else { +		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, +						  smu_features_low); +		if (ret) +			return ret; +		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, +						  smu_features_high); +		if (ret) +			return ret; +	} + +	return 0; + +} + +static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf) +{ +	static const char *ppfeature_name[] = { +				"DPM_PREFETCHER", +				"GFXCLK_DPM", +				"UCLK_DPM", +				"SOCCLK_DPM", +				"UVD_DPM", +				"VCE_DPM", +				"ULV", +				"MP0CLK_DPM", +				"LINK_DPM", +				"DCEFCLK_DPM", +				"GFXCLK_DS", +				"SOCCLK_DS", +				"LCLK_DS", +				"PPT", +				"TDC", +				"THERMAL", +				"GFX_PER_CU_CG", +				"RM", +				"DCEFCLK_DS", +				"ACDC", +				"VR0HOT", +				"VR1HOT", +				"FW_CTF", +				"LED_DISPLAY", +				"FAN_CONTROL", +				"GFX_EDC", +				"GFXOFF", +				"CG", +				"FCLK_DPM", +				"FCLK_DS", +				"MP1CLK_DS", +				"MP0CLK_DS", +				"XGMI", +				"ECC"}; +	static const char *output_title[] = { +				"FEATURES", +				"BITMASK", +				"ENABLEMENT"}; +	uint64_t features_enabled; +	int i; +	int ret = 0; +	int size = 0; + +	ret = vega20_get_enabled_smc_features(smu, &features_enabled); +	if (ret) +		return ret; + +	size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); +	size += sprintf(buf + size, "%-19s %-22s %s\n", +				output_title[0], +				output_title[1], +				output_title[2]); +	for (i = 0; i < GNLD_FEATURES_MAX; i++) { +		size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", +					ppfeature_name[i], +					1ULL << i, +					(features_enabled & (1ULL << i)) ? "Y" : "N"); +	} + +	return size; +} + +static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks) +{ +	uint64_t features_enabled; +	uint64_t features_to_enable; +	uint64_t features_to_disable; +	int ret = 0; + +	if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) +		return -EINVAL; + +	ret = vega20_get_enabled_smc_features(smu, &features_enabled); +	if (ret) +		return ret; + +	features_to_disable = +		features_enabled & ~new_ppfeature_masks; +	features_to_enable = +		~features_enabled & new_ppfeature_masks; + +	pr_debug("features_to_disable 0x%llx\n", features_to_disable); +	pr_debug("features_to_enable 0x%llx\n", features_to_enable); + +	if (features_to_disable) { +		ret = vega20_enable_smc_features(smu, false, features_to_disable); +		if (ret) +			return ret; +	} + +	if (features_to_enable) { +		ret = vega20_enable_smc_features(smu, true, features_to_enable); +		if (ret) +			return ret; +	} + +	return 0; +} +  static const struct pptable_funcs vega20_ppt_funcs = {  	.alloc_dpm_context = vega20_allocate_dpm_context,  	.store_powerplay_table = vega20_store_powerplay_table, @@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = {  	.unforce_dpm_levels = vega20_unforce_dpm_levels,  	.upload_dpm_level = vega20_upload_dpm_level,  	.get_profiling_clk_mask = vega20_get_profiling_clk_mask, +	.set_ppfeature_status = vega20_set_ppfeature_status, +	.get_ppfeature_status = vega20_get_ppfeature_status,  };  void vega20_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h index 5a0d2af63173..87f3a8303645 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h @@ -36,6 +36,50 @@  #define AVFS_CURVE 0  #define OD8_HOTCURVE_TEMPERATURE 85 +#define SMU_FEATURES_LOW_MASK        0x00000000FFFFFFFF +#define SMU_FEATURES_LOW_SHIFT       0 +#define SMU_FEATURES_HIGH_MASK       0xFFFFFFFF00000000 +#define SMU_FEATURES_HIGH_SHIFT      32 + +enum { +	GNLD_DPM_PREFETCHER = 0, +	GNLD_DPM_GFXCLK, +	GNLD_DPM_UCLK, +	GNLD_DPM_SOCCLK, +	GNLD_DPM_UVD, +	GNLD_DPM_VCE, +	GNLD_ULV, +	GNLD_DPM_MP0CLK, +	GNLD_DPM_LINK, +	GNLD_DPM_DCEFCLK, +	GNLD_DS_GFXCLK, +	GNLD_DS_SOCCLK, +	GNLD_DS_LCLK, +	GNLD_PPT, +	GNLD_TDC, +	GNLD_THERMAL, +	GNLD_GFX_PER_CU_CG, +	GNLD_RM, +	GNLD_DS_DCEFCLK, +	GNLD_ACDC, +	GNLD_VR0HOT, +	GNLD_VR1HOT, +	GNLD_FW_CTF, +	GNLD_LED_DISPLAY, +	GNLD_FAN_CONTROL, +	GNLD_DIDT, +	GNLD_GFXOFF, +	GNLD_CG, +	GNLD_DPM_FCLK, +	GNLD_DS_FCLK, +	GNLD_DS_MP1CLK, +	GNLD_DS_MP0CLK, +	GNLD_XGMI, +	GNLD_ECC, + +	GNLD_FEATURES_MAX +}; +  struct vega20_dpm_level {          bool            enabled;          uint32_t        value;  | 

